wgit 0.5.1 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/CHANGELOG.md +249 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +232 -0
- data/bin/wgit +39 -0
- data/lib/wgit.rb +3 -1
- data/lib/wgit/assertable.rb +3 -3
- data/lib/wgit/base.rb +30 -0
- data/lib/wgit/core_ext.rb +1 -1
- data/lib/wgit/crawler.rb +304 -148
- data/lib/wgit/database/database.rb +310 -135
- data/lib/wgit/database/model.rb +10 -3
- data/lib/wgit/document.rb +241 -169
- data/lib/wgit/{document_extensions.rb → document_extractors.rb} +20 -10
- data/lib/wgit/dsl.rb +324 -0
- data/lib/wgit/indexer.rb +68 -156
- data/lib/wgit/response.rb +17 -14
- data/lib/wgit/url.rb +213 -73
- data/lib/wgit/utils.rb +32 -20
- data/lib/wgit/version.rb +3 -2
- metadata +38 -19
data/lib/wgit/response.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Wgit
|
2
|
-
# Response class
|
2
|
+
# Response class modeling a generic HTTP GET response.
|
3
3
|
class Response
|
4
4
|
# The underlying HTTP adapter/library response object.
|
5
5
|
attr_accessor :adapter_response
|
@@ -56,11 +56,11 @@ module Wgit
|
|
56
56
|
@body.empty? ? nil : @body
|
57
57
|
end
|
58
58
|
|
59
|
-
# Returns
|
59
|
+
# Returns whether or not a server response is absent.
|
60
60
|
#
|
61
|
-
# @return [Boolean] True if
|
61
|
+
# @return [Boolean] True if the status is nil or < 1, false otherwise.
|
62
62
|
def failure?
|
63
|
-
!success?
|
63
|
+
!success?
|
64
64
|
end
|
65
65
|
|
66
66
|
# Sets the headers Hash to the given value. The header keys are mapped
|
@@ -69,7 +69,10 @@ module Wgit
|
|
69
69
|
# @param headers [Hash] The new response headers.
|
70
70
|
# @return [Hash] @headers's new value.
|
71
71
|
def headers=(headers)
|
72
|
-
|
72
|
+
unless headers
|
73
|
+
@headers = {}
|
74
|
+
return
|
75
|
+
end
|
73
76
|
|
74
77
|
@headers = headers.map do |k, v|
|
75
78
|
k = k.downcase.gsub('-', '_').to_sym
|
@@ -122,20 +125,20 @@ module Wgit
|
|
122
125
|
@status = int.positive? ? int : nil
|
123
126
|
end
|
124
127
|
|
125
|
-
# Returns whether or not
|
128
|
+
# Returns whether or not a server response is present.
|
126
129
|
#
|
127
|
-
# @return [Boolean] True if
|
130
|
+
# @return [Boolean] True if the status is > 0, false otherwise.
|
128
131
|
def success?
|
129
132
|
return false unless @status
|
130
133
|
|
131
|
-
@status.
|
134
|
+
@status.positive?
|
132
135
|
end
|
133
136
|
|
134
|
-
alias code
|
135
|
-
alias content
|
136
|
-
alias
|
137
|
-
alias to_s
|
138
|
-
alias redirects
|
139
|
-
alias length
|
137
|
+
alias code status
|
138
|
+
alias content body
|
139
|
+
alias crawl_duration total_time
|
140
|
+
alias to_s body
|
141
|
+
alias redirects redirections
|
142
|
+
alias length size
|
140
143
|
end
|
141
144
|
end
|
data/lib/wgit/url.rb
CHANGED
@@ -6,20 +6,20 @@ require 'uri'
|
|
6
6
|
require 'addressable/uri'
|
7
7
|
|
8
8
|
module Wgit
|
9
|
-
# Class modeling a web based HTTP URL.
|
9
|
+
# Class modeling/serialising a web based HTTP URL.
|
10
10
|
#
|
11
11
|
# Can be an internal/relative link e.g. "about.html" or an absolute URL
|
12
|
-
# e.g. "http://www.google.co.uk". Is a subclass of String and uses
|
13
|
-
#
|
12
|
+
# e.g. "http://www.google.co.uk". Is a subclass of String and uses `URI` and
|
13
|
+
# `addressable/uri` internally for parsing.
|
14
14
|
#
|
15
|
-
# Most of the methods in this class return new Wgit::Url instances making
|
16
|
-
# method calls chainable e.g. url.omit_base.omit_fragment etc. The
|
17
|
-
# also try to be idempotent where possible.
|
15
|
+
# Most of the methods in this class return new `Wgit::Url` instances making
|
16
|
+
# the method calls chainable e.g. `url.omit_base.omit_fragment` etc. The
|
17
|
+
# methods also try to be idempotent where possible.
|
18
18
|
class Url < String
|
19
19
|
include Assertable
|
20
20
|
|
21
21
|
# Whether or not the Url has been crawled or not. A custom crawled= method
|
22
|
-
# is provided by this class
|
22
|
+
# is provided by this class.
|
23
23
|
attr_reader :crawled
|
24
24
|
|
25
25
|
# The Time stamp of when this Url was crawled.
|
@@ -28,10 +28,10 @@ module Wgit
|
|
28
28
|
# The duration of the crawl for this Url (in seconds).
|
29
29
|
attr_accessor :crawl_duration
|
30
30
|
|
31
|
-
# Initializes a new instance of Wgit::Url which
|
31
|
+
# Initializes a new instance of Wgit::Url which models a web based
|
32
32
|
# HTTP URL.
|
33
33
|
#
|
34
|
-
# @param url_or_obj [String, Wgit::Url,
|
34
|
+
# @param url_or_obj [String, Wgit::Url, #fetch#[]] Is either a String
|
35
35
|
# based URL or an object representing a Database record e.g. a MongoDB
|
36
36
|
# document/object.
|
37
37
|
# @param crawled [Boolean] Whether or not the HTML of the URL's web page
|
@@ -90,6 +90,23 @@ module Wgit
|
|
90
90
|
obj.is_a?(Wgit::Url) ? obj : new(obj)
|
91
91
|
end
|
92
92
|
|
93
|
+
# Returns a Wgit::Url instance from Wgit::Url.parse, or nil if obj cannot
|
94
|
+
# be parsed successfully e.g. the String is invalid.
|
95
|
+
#
|
96
|
+
# Use this method when you can't gaurentee that obj is parsable as a URL.
|
97
|
+
# See Wgit::Url.parse for more information.
|
98
|
+
#
|
99
|
+
# @param obj [Object] The object to parse, which #is_a?(String).
|
100
|
+
# @raise [StandardError] If obj.is_a?(String) is false.
|
101
|
+
# @return [Wgit::Url] A Wgit::Url instance or nil (if obj is invalid).
|
102
|
+
def self.parse?(obj)
|
103
|
+
parse(obj)
|
104
|
+
rescue Addressable::URI::InvalidURIError
|
105
|
+
Wgit.logger.debug("Wgit::Url.parse?('#{obj}') exception: \
|
106
|
+
Addressable::URI::InvalidURIError")
|
107
|
+
nil
|
108
|
+
end
|
109
|
+
|
93
110
|
# Sets the @crawled instance var, also setting @date_crawled for
|
94
111
|
# convenience.
|
95
112
|
#
|
@@ -98,8 +115,6 @@ module Wgit
|
|
98
115
|
def crawled=(bool)
|
99
116
|
@crawled = bool
|
100
117
|
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
101
|
-
|
102
|
-
bool
|
103
118
|
end
|
104
119
|
|
105
120
|
# Overrides String#replace setting the new_url @uri and String value.
|
@@ -114,33 +129,40 @@ module Wgit
|
|
114
129
|
|
115
130
|
# Returns true if self is a relative Url; false if absolute.
|
116
131
|
#
|
117
|
-
#
|
118
|
-
# 'http://', otherwise the
|
132
|
+
# An absolute URL must have a scheme prefix e.g.
|
133
|
+
# 'http://', otherwise the URL is regarded as being relative (regardless
|
119
134
|
# of whether it's valid or not). The only exception is if an opts arg is
|
120
135
|
# provided and self is a page belonging to that arg type e.g. host; then
|
121
136
|
# the link is relative.
|
122
137
|
#
|
138
|
+
# @example
|
139
|
+
# url = Wgit::Url.new('http://example.com/about')
|
140
|
+
#
|
141
|
+
# url.relative? # => false
|
142
|
+
# url.relative?(host: 'http://example.com') # => true
|
143
|
+
#
|
123
144
|
# @param opts [Hash] The options with which to check relativity. Only one
|
124
145
|
# opts param should be provided. The provided opts param Url must be
|
125
146
|
# absolute and be prefixed with a scheme. Consider using the output of
|
126
|
-
# Wgit::Url#
|
127
|
-
# @option opts [Wgit::Url, String] :
|
128
|
-
# http://www.google.com/how which gives a
|
129
|
-
# 'http://www.google.com'.
|
147
|
+
# Wgit::Url#to_origin which should work (unless it's nil).
|
148
|
+
# @option opts [Wgit::Url, String] :origin The Url origin e.g.
|
149
|
+
# http://www.google.com:81/how which gives a origin of
|
150
|
+
# 'http://www.google.com:81'.
|
130
151
|
# @option opts [Wgit::Url, String] :host The Url host e.g.
|
131
152
|
# http://www.google.com/how which gives a host of 'www.google.com'.
|
132
153
|
# @option opts [Wgit::Url, String] :domain The Url domain e.g.
|
133
154
|
# http://www.google.com/how which gives a domain of 'google.com'.
|
134
155
|
# @option opts [Wgit::Url, String] :brand The Url brand e.g.
|
135
156
|
# http://www.google.com/how which gives a domain of 'google'.
|
136
|
-
# @raise [StandardError] If self is invalid e.g. empty or an invalid opts
|
157
|
+
# @raise [StandardError] If self is invalid (e.g. empty) or an invalid opts
|
137
158
|
# param has been provided.
|
138
159
|
# @return [Boolean] True if relative, false if absolute.
|
139
160
|
def relative?(opts = {})
|
140
|
-
defaults = {
|
161
|
+
defaults = { origin: nil, host: nil, domain: nil, brand: nil }
|
141
162
|
opts = defaults.merge(opts)
|
142
163
|
raise 'Url (self) cannot be empty' if empty?
|
143
164
|
|
165
|
+
return false if scheme_relative?
|
144
166
|
return true if @uri.relative?
|
145
167
|
|
146
168
|
# Self is absolute but may be relative to the opts param e.g. host.
|
@@ -151,14 +173,14 @@ module Wgit
|
|
151
173
|
|
152
174
|
type, url = opts.first
|
153
175
|
url = Wgit::Url.new(url)
|
154
|
-
|
155
|
-
raise "Invalid opts param value,
|
156
|
-
protocol scheme: #{url}"
|
176
|
+
if url.invalid?
|
177
|
+
raise "Invalid opts param value, it must be absolute, containing a \
|
178
|
+
protocol scheme and domain (e.g. http://example.com): #{url}"
|
157
179
|
end
|
158
180
|
|
159
181
|
case type
|
160
|
-
when :
|
161
|
-
|
182
|
+
when :origin # http://www.google.com:81
|
183
|
+
to_origin == url.to_origin
|
162
184
|
when :host # www.google.com
|
163
185
|
to_host == url.to_host
|
164
186
|
when :domain # google.com
|
@@ -177,18 +199,20 @@ protocol scheme: #{url}"
|
|
177
199
|
@uri.absolute?
|
178
200
|
end
|
179
201
|
|
180
|
-
# Returns if self is a valid and absolute HTTP
|
202
|
+
# Returns if self is a valid and absolute HTTP URL or not. Self should
|
203
|
+
# always be crawlable if this method returns true.
|
181
204
|
#
|
182
|
-
# @return [Boolean] True if valid and
|
205
|
+
# @return [Boolean] True if valid, absolute and crawable, otherwise false.
|
183
206
|
def valid?
|
184
207
|
return false if relative?
|
185
|
-
return false unless
|
186
|
-
return false
|
208
|
+
return false unless to_origin && to_domain
|
209
|
+
return false unless URI::DEFAULT_PARSER.make_regexp.match(normalize)
|
187
210
|
|
188
211
|
true
|
189
212
|
end
|
190
213
|
|
191
|
-
# Returns if self is an invalid (relative) HTTP
|
214
|
+
# Returns if self is an invalid (e.g. relative) HTTP URL. See
|
215
|
+
# Wgit::Url#valid? for the inverse (and more information).
|
192
216
|
#
|
193
217
|
# @return [Boolean] True if invalid, otherwise false.
|
194
218
|
def invalid?
|
@@ -213,7 +237,8 @@ protocol scheme: #{url}"
|
|
213
237
|
Wgit::Url.new(concatted)
|
214
238
|
end
|
215
239
|
|
216
|
-
#
|
240
|
+
# Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
|
241
|
+
# This should be used before GET'ing the url, in case it has IRI chars.
|
217
242
|
#
|
218
243
|
# @return [Wgit::Url] An escaped version of self.
|
219
244
|
def normalize
|
@@ -224,43 +249,46 @@ protocol scheme: #{url}"
|
|
224
249
|
# modify the receiver.
|
225
250
|
#
|
226
251
|
# If self is absolute then it's returned as is, making this method
|
227
|
-
# idempotent. The doc's
|
228
|
-
# doc.url is used as the base; which is concatted with self.
|
252
|
+
# idempotent. The doc's `<base>` element is used if present, otherwise
|
253
|
+
# `doc.url` is used as the base; which is concatted with self.
|
229
254
|
#
|
230
|
-
# Typically used to build an absolute link obtained from a document
|
255
|
+
# Typically used to build an absolute link obtained from a document.
|
231
256
|
#
|
257
|
+
# @example
|
232
258
|
# link = Wgit::Url.new('/favicon.png')
|
233
259
|
# doc = Wgit::Document.new('http://example.com')
|
234
260
|
#
|
235
|
-
# link.
|
261
|
+
# link.make_absolute(doc) # => "http://example.com/favicon.png"
|
236
262
|
#
|
237
263
|
# @param doc [Wgit::Document] The doc whose base Url is concatted with
|
238
264
|
# self.
|
239
265
|
# @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
|
240
266
|
# raises an Exception.
|
241
267
|
# @return [Wgit::Url] Self in absolute form.
|
242
|
-
def
|
268
|
+
def make_absolute(doc)
|
243
269
|
assert_type(doc, Wgit::Document)
|
270
|
+
raise 'Cannot make absolute when Document @url is not valid' \
|
271
|
+
unless doc.url.valid?
|
272
|
+
|
273
|
+
return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
|
244
274
|
|
245
275
|
absolute? ? self : doc.base_url(link: self).concat(self)
|
246
276
|
end
|
247
277
|
|
248
|
-
# Returns self having prefixed a protocol
|
278
|
+
# Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
|
249
279
|
# Returns self even if absolute (with scheme); therefore is idempotent.
|
250
280
|
#
|
251
|
-
# @param
|
252
|
-
# @return [Wgit::Url] Self with a
|
253
|
-
def prefix_scheme(
|
254
|
-
|
255
|
-
|
256
|
-
case protocol
|
257
|
-
when :http
|
258
|
-
Wgit::Url.new("http://#{url}")
|
259
|
-
when :https
|
260
|
-
Wgit::Url.new("https://#{url}")
|
261
|
-
else
|
262
|
-
raise "protocol must be :http or :https, not :#{protocol}"
|
281
|
+
# @param scheme [Symbol] Either :http or :https.
|
282
|
+
# @return [Wgit::Url] Self with a scheme prefix.
|
283
|
+
def prefix_scheme(scheme = :http)
|
284
|
+
unless %i[http https].include?(scheme)
|
285
|
+
raise "scheme must be :http or :https, not :#{scheme}"
|
263
286
|
end
|
287
|
+
|
288
|
+
return self if absolute? && !scheme_relative?
|
289
|
+
|
290
|
+
separator = scheme_relative? ? '' : '//'
|
291
|
+
Wgit::Url.new("#{scheme}:#{separator}#{self}")
|
264
292
|
end
|
265
293
|
|
266
294
|
# Returns a Hash containing this Url's instance vars excluding @uri.
|
@@ -268,8 +296,7 @@ protocol scheme: #{url}"
|
|
268
296
|
#
|
269
297
|
# @return [Hash] self's instance vars as a Hash.
|
270
298
|
def to_h
|
271
|
-
|
272
|
-
h = Wgit::Utils.to_h(self, ignore: ignore)
|
299
|
+
h = Wgit::Utils.to_h(self, ignore: ['@uri'])
|
273
300
|
Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
|
274
301
|
end
|
275
302
|
|
@@ -312,6 +339,20 @@ protocol scheme: #{url}"
|
|
312
339
|
host ? Wgit::Url.new(host) : nil
|
313
340
|
end
|
314
341
|
|
342
|
+
# Returns a new Wgit::Url containing just the port of this URL e.g.
|
343
|
+
# Given http://www.google.co.uk:443/about.html, '443' is returned.
|
344
|
+
#
|
345
|
+
# @return [Wgit::Url, nil] Containing just the port or nil.
|
346
|
+
def to_port
|
347
|
+
port = @uri.port
|
348
|
+
|
349
|
+
# @uri.port defaults port to 80/443 if missing, so we check for :#{port}.
|
350
|
+
return nil unless port
|
351
|
+
return nil unless include?(":#{port}")
|
352
|
+
|
353
|
+
Wgit::Url.new(port.to_s)
|
354
|
+
end
|
355
|
+
|
315
356
|
# Returns a new Wgit::Url containing just the domain of this URL e.g.
|
316
357
|
# Given http://www.google.co.uk/about.html, google.co.uk is returned.
|
317
358
|
#
|
@@ -321,6 +362,20 @@ protocol scheme: #{url}"
|
|
321
362
|
domain ? Wgit::Url.new(domain) : nil
|
322
363
|
end
|
323
364
|
|
365
|
+
# Returns a new Wgit::Url containing just the sub domain of this URL e.g.
|
366
|
+
# Given http://scripts.dev.google.com, scripts.dev is returned.
|
367
|
+
#
|
368
|
+
# @return [Wgit::Url, nil] Containing just the sub domain or nil.
|
369
|
+
def to_sub_domain
|
370
|
+
return nil unless to_host
|
371
|
+
|
372
|
+
dot_domain = ".#{to_domain}"
|
373
|
+
return nil unless include?(dot_domain)
|
374
|
+
|
375
|
+
sub_domain = to_host.sub(dot_domain, '')
|
376
|
+
Wgit::Url.new(sub_domain)
|
377
|
+
end
|
378
|
+
|
324
379
|
# Returns a new Wgit::Url containing just the brand of this URL e.g.
|
325
380
|
# Given http://www.google.co.uk/about.html, google is returned.
|
326
381
|
#
|
@@ -336,12 +391,24 @@ protocol scheme: #{url}"
|
|
336
391
|
# @return [Wgit::Url, nil] The base of self e.g. http://www.google.co.uk or
|
337
392
|
# nil.
|
338
393
|
def to_base
|
339
|
-
return nil
|
394
|
+
return nil unless @uri.scheme && @uri.host
|
340
395
|
|
341
396
|
base = "#{@uri.scheme}://#{@uri.host}"
|
342
397
|
Wgit::Url.new(base)
|
343
398
|
end
|
344
399
|
|
400
|
+
# Returns only the origin of this URL e.g. the protocol scheme, host and
|
401
|
+
# port combined. For http://localhost:3000/api, http://localhost:3000 gets
|
402
|
+
# returned. If there's no port present, then to_base is returned.
|
403
|
+
#
|
404
|
+
# @return [Wgit::Url, nil] The origin of self or nil.
|
405
|
+
def to_origin
|
406
|
+
return nil unless to_base
|
407
|
+
return to_base unless to_port
|
408
|
+
|
409
|
+
Wgit::Url.new("#{to_base}:#{to_port}")
|
410
|
+
end
|
411
|
+
|
345
412
|
# Returns the path of this URL e.g. the bit after the host without slashes.
|
346
413
|
# For example:
|
347
414
|
# Wgit::Url.new("http://www.google.co.uk/about.html/").to_path returns
|
@@ -370,7 +437,7 @@ protocol scheme: #{url}"
|
|
370
437
|
end
|
371
438
|
|
372
439
|
# Returns a new Wgit::Url containing just the query string of this URL
|
373
|
-
# e.g. Given http://google.com?q=
|
440
|
+
# e.g. Given http://google.com?q=foo&bar=1, 'q=ruby&bar=1' is returned.
|
374
441
|
#
|
375
442
|
# @return [Wgit::Url, nil] Containing just the query string or nil.
|
376
443
|
def to_query
|
@@ -378,6 +445,24 @@ protocol scheme: #{url}"
|
|
378
445
|
query ? Wgit::Url.new(query) : nil
|
379
446
|
end
|
380
447
|
|
448
|
+
# Returns a Hash containing just the query string parameters of this URL
|
449
|
+
# e.g. Given http://google.com?q=ruby, "{ 'q' => 'ruby' }" is returned.
|
450
|
+
#
|
451
|
+
# @param symbolize_keys [Boolean] The returned Hash keys will be Symbols if
|
452
|
+
# true, Strings otherwise.
|
453
|
+
# @return [Hash<String | Symbol, String>] Containing the query string
|
454
|
+
# params or empty if the URL doesn't contain any query parameters.
|
455
|
+
def to_query_hash(symbolize_keys: false)
|
456
|
+
query_str = to_query
|
457
|
+
return {} unless query_str
|
458
|
+
|
459
|
+
query_str.split('&').each_with_object({}) do |param, hash|
|
460
|
+
k, v = param.split('=')
|
461
|
+
k = k.to_sym if symbolize_keys
|
462
|
+
hash[k] = v
|
463
|
+
end
|
464
|
+
end
|
465
|
+
|
381
466
|
# Returns a new Wgit::Url containing just the fragment string of this URL
|
382
467
|
# e.g. Given http://google.com#about, #about is returned.
|
383
468
|
#
|
@@ -399,6 +484,24 @@ protocol scheme: #{url}"
|
|
399
484
|
segs.length > 1 ? Wgit::Url.new(segs.last) : nil
|
400
485
|
end
|
401
486
|
|
487
|
+
# Returns a new Wgit::Url containing just the username string of this URL
|
488
|
+
# e.g. Given http://me:pass1@example.com, me is returned.
|
489
|
+
#
|
490
|
+
# @return [Wgit::Url, nil] Containing just the user string or nil.
|
491
|
+
def to_user
|
492
|
+
user = @uri.user
|
493
|
+
user ? Wgit::Url.new(user) : nil
|
494
|
+
end
|
495
|
+
|
496
|
+
# Returns a new Wgit::Url containing just the password string of this URL
|
497
|
+
# e.g. Given http://me:pass1@example.com, pass1 is returned.
|
498
|
+
#
|
499
|
+
# @return [Wgit::Url, nil] Containing just the password string or nil.
|
500
|
+
def to_password
|
501
|
+
password = @uri.password
|
502
|
+
password ? Wgit::Url.new(password) : nil
|
503
|
+
end
|
504
|
+
|
402
505
|
# Omits the given URL components from self and returns a new Wgit::Url.
|
403
506
|
#
|
404
507
|
# Calls Addressable::URI#omit underneath and creates a new Wgit::Url from
|
@@ -442,7 +545,7 @@ protocol scheme: #{url}"
|
|
442
545
|
.omit_trailing_slash
|
443
546
|
end
|
444
547
|
|
445
|
-
# Returns a new Wgit::Url with the base (
|
548
|
+
# Returns a new Wgit::Url with the base (scheme and host) removed e.g. Given
|
446
549
|
# http://google.com/search?q=something#about, search?q=something#about is
|
447
550
|
# returned. If relative and base isn't present then self is returned.
|
448
551
|
# Leading and trailing slashes are always stripped from the return value.
|
@@ -457,6 +560,21 @@ protocol scheme: #{url}"
|
|
457
560
|
Wgit::Url.new(omit_base).omit_slashes
|
458
561
|
end
|
459
562
|
|
563
|
+
# Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
|
564
|
+
# http://google.com:81/search?q=something#about, search?q=something#about is
|
565
|
+
# returned. If relative and base isn't present then self is returned.
|
566
|
+
# Leading and trailing slashes are always stripped from the return value.
|
567
|
+
#
|
568
|
+
# @return [Wgit::Url] Self containing everything after the origin.
|
569
|
+
def omit_origin
|
570
|
+
origin = to_origin
|
571
|
+
omit_origin = origin ? gsub(origin, '') : self
|
572
|
+
|
573
|
+
return self if ['', '/'].include?(omit_origin)
|
574
|
+
|
575
|
+
Wgit::Url.new(omit_origin).omit_slashes
|
576
|
+
end
|
577
|
+
|
460
578
|
# Returns a new Wgit::Url with the query string portion removed e.g. Given
|
461
579
|
# http://google.com/search?q=hello, http://google.com/search is
|
462
580
|
# returned. Self is returned as is if no query string is present. A URL
|
@@ -502,25 +620,47 @@ protocol scheme: #{url}"
|
|
502
620
|
start_with?('#')
|
503
621
|
end
|
504
622
|
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
alias
|
521
|
-
alias
|
522
|
-
alias
|
523
|
-
alias
|
524
|
-
alias
|
623
|
+
# Returns true if self equals '/' a.k.a. index.
|
624
|
+
#
|
625
|
+
# @return [Boolean] True if self equals '/', false otherwise.
|
626
|
+
def index?
|
627
|
+
self == '/'
|
628
|
+
end
|
629
|
+
|
630
|
+
# Returns true if self starts with '//' a.k.a a scheme/protocol relative
|
631
|
+
# path.
|
632
|
+
#
|
633
|
+
# @return [Boolean] True if self starts with '//', false otherwise.
|
634
|
+
def scheme_relative?
|
635
|
+
start_with?('//')
|
636
|
+
end
|
637
|
+
|
638
|
+
alias + concat
|
639
|
+
alias crawled? crawled
|
640
|
+
alias is_relative? relative?
|
641
|
+
alias is_absolute? absolute?
|
642
|
+
alias is_valid? valid?
|
643
|
+
alias is_query? query?
|
644
|
+
alias is_fragment? fragment?
|
645
|
+
alias is_index? index?
|
646
|
+
alias is_scheme_relative? scheme_relative?
|
647
|
+
alias uri to_uri
|
648
|
+
alias url to_url
|
649
|
+
alias scheme to_scheme
|
650
|
+
alias host to_host
|
651
|
+
alias port to_port
|
652
|
+
alias domain to_domain
|
653
|
+
alias brand to_brand
|
654
|
+
alias base to_base
|
655
|
+
alias origin to_origin
|
656
|
+
alias path to_path
|
657
|
+
alias endpoint to_endpoint
|
658
|
+
alias query to_query
|
659
|
+
alias query_hash to_query_hash
|
660
|
+
alias fragment to_fragment
|
661
|
+
alias extension to_extension
|
662
|
+
alias user to_user
|
663
|
+
alias password to_password
|
664
|
+
alias sub_domain to_sub_domain
|
525
665
|
end
|
526
666
|
end
|