wgit 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,7 +55,7 @@ end
55
55
  # Text.
56
56
  Wgit::Document.define_extension(
57
57
  :text,
58
- proc { Wgit::Document.text_elements_xpath },
58
+ Wgit::Document::TEXT_ELEMENTS_XPATH,
59
59
  singleton: false,
60
60
  text_content_only: true
61
61
  )
@@ -56,11 +56,11 @@ module Wgit
56
56
  @body.empty? ? nil : @body
57
57
  end
58
58
 
59
- # Returns true if the response isn't a #success? or a #redirect?
59
+ # Returns whether or not a server response is absent.
60
60
  #
61
- # @return [Boolean] True if failed, false otherwise.
61
+ # @return [Boolean] True if the status is nil or < 1, false otherwise.
62
62
  def failure?
63
- !success? && !redirect?
63
+ !success?
64
64
  end
65
65
 
66
66
  # Sets the headers Hash to the given value. The header keys are mapped
@@ -122,13 +122,13 @@ module Wgit
122
122
  @status = int.positive? ? int : nil
123
123
  end
124
124
 
125
- # Returns whether or not the response is a 2xx Success.
125
+ # Returns whether or not a server response is present.
126
126
  #
127
- # @return [Boolean] True if 2xx Success, false otherwise.
127
+ # @return [Boolean] True if the status is > 0, false otherwise.
128
128
  def success?
129
129
  return false unless @status
130
130
 
131
- @status.between?(200, 299)
131
+ @status.positive?
132
132
  end
133
133
 
134
134
  alias code status
@@ -19,7 +19,7 @@ module Wgit
19
19
  include Assertable
20
20
 
21
21
  # Whether or not the Url has been crawled or not. A custom crawled= method
22
- # is provided by this class, overridding the default one.
22
+ # is provided by this class.
23
23
  attr_reader :crawled
24
24
 
25
25
  # The Time stamp of when this Url was crawled.
@@ -31,7 +31,7 @@ module Wgit
31
31
  # Initializes a new instance of Wgit::Url which represents a web based
32
32
  # HTTP URL.
33
33
  #
34
- # @param url_or_obj [String, Wgit::Url, Object#fetch#[]] Is either a String
34
+ # @param url_or_obj [String, Wgit::Url, #fetch#[]] Is either a String
35
35
  # based URL or an object representing a Database record e.g. a MongoDB
36
36
  # document/object.
37
37
  # @param crawled [Boolean] Whether or not the HTML of the URL's web page
@@ -114,16 +114,22 @@ module Wgit
114
114
 
115
115
  # Returns true if self is a relative Url; false if absolute.
116
116
  #
117
- # All external links in a page are expected to have a scheme prefix e.g.
118
- # 'http://', otherwise the link is treated as an internal link (regardless
117
+ # An absolute URL must have a scheme prefix e.g.
118
+ # 'http://', otherwise the URL is regarded as being relative (regardless
119
119
  # of whether it's valid or not). The only exception is if an opts arg is
120
120
  # provided and self is a page belonging to that arg type e.g. host; then
121
121
  # the link is relative.
122
122
  #
123
+ # @example
124
+ # url = Wgit::Url.new('http://example.com/about')
125
+ #
126
+ # url.relative? # => false
127
+ # url.relative?(host: 'http://example.com') # => true
128
+ #
123
129
  # @param opts [Hash] The options with which to check relativity. Only one
124
130
  # opts param should be provided. The provided opts param Url must be
125
131
  # absolute and be prefixed with a scheme. Consider using the output of
126
- # Wgit::Url#to_base which should work unless it's nil.
132
+ # Wgit::Url#to_base which should work (unless it's nil).
127
133
  # @option opts [Wgit::Url, String] :base The Url base e.g.
128
134
  # http://www.google.com/how which gives a base of
129
135
  # 'http://www.google.com'.
@@ -133,7 +139,7 @@ module Wgit
133
139
  # http://www.google.com/how which gives a domain of 'google.com'.
134
140
  # @option opts [Wgit::Url, String] :brand The Url brand e.g.
135
141
  # http://www.google.com/how which gives a domain of 'google'.
136
- # @raise [StandardError] If self is invalid e.g. empty or an invalid opts
142
+ # @raise [StandardError] If self is invalid (e.g. empty) or an invalid opts
137
143
  # param has been provided.
138
144
  # @return [Boolean] True if relative, false if absolute.
139
145
  def relative?(opts = {})
@@ -151,9 +157,9 @@ module Wgit
151
157
 
152
158
  type, url = opts.first
153
159
  url = Wgit::Url.new(url)
154
- unless url.to_base
155
- raise "Invalid opts param value, Url must be absolute and contain \
156
- protocol scheme: #{url}"
160
+ if url.invalid?
161
+ raise "Invalid opts param value, it must be absolute, containing a \
162
+ protocol scheme and domain (e.g. http://example.com): #{url}"
157
163
  end
158
164
 
159
165
  case type
@@ -177,18 +183,20 @@ protocol scheme: #{url}"
177
183
  @uri.absolute?
178
184
  end
179
185
 
180
- # Returns if self is a valid and absolute HTTP Url or not.
186
+ # Returns if self is a valid and absolute HTTP URL or not. Self should
187
+ # always be crawlable if this method returns true.
181
188
  #
182
- # @return [Boolean] True if valid and absolute, otherwise false.
189
+ # @return [Boolean] True if valid, absolute and crawable, otherwise false.
183
190
  def valid?
184
191
  return false if relative?
185
- return false unless start_with?('http://') || start_with?('https://')
192
+ return false unless to_base && to_domain
186
193
  return false if URI::DEFAULT_PARSER.make_regexp.match(normalize).nil?
187
194
 
188
195
  true
189
196
  end
190
197
 
191
- # Returns if self is an invalid (relative) HTTP Url or not.
198
+ # Returns if self is an invalid (e.g. relative) HTTP URL. See
199
+ # Wgit::Url#valid? for the inverse (and more information).
192
200
  #
193
201
  # @return [Boolean] True if invalid, otherwise false.
194
202
  def invalid?
@@ -227,8 +235,9 @@ protocol scheme: #{url}"
227
235
  # idempotent. The doc's <base> element is used if present, otherwise
228
236
  # doc.url is used as the base; which is concatted with self.
229
237
  #
230
- # Typically used to build an absolute link obtained from a document e.g.
238
+ # Typically used to build an absolute link obtained from a document.
231
239
  #
240
+ # @example
232
241
  # link = Wgit::Url.new('/favicon.png')
233
242
  # doc = Wgit::Document.new('http://example.com')
234
243
  #
@@ -188,9 +188,9 @@ module Wgit
188
188
  #
189
189
  # @param arr [Enumerable] The Array to process. arr is modified.
190
190
  # @return [Enumerable] The processed arr is both modified and then returned.
191
- def self.process_arr(arr)
191
+ def self.process_arr(arr, encode: true)
192
192
  if arr.is_a?(Array)
193
- arr.map! { |str| process_str(str) }
193
+ arr.map! { |str| process_str(str, encode: encode) }
194
194
  arr.reject! { |str| str.is_a?(String) ? str.empty? : false }
195
195
  arr.compact!
196
196
  arr.uniq!
@@ -5,7 +5,7 @@
5
5
  # @author Michael Telford
6
6
  module Wgit
7
7
  # The current gem version of Wgit.
8
- VERSION = '0.5.1'
8
+ VERSION = '0.6.0'
9
9
 
10
10
  # Returns the current gem version of Wgit as a String.
11
11
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wgit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-22 00:00:00.000000000 Z
11
+ date: 2019-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -213,6 +213,12 @@ files:
213
213
  - "./lib/wgit/url.rb"
214
214
  - "./lib/wgit/utils.rb"
215
215
  - "./lib/wgit/version.rb"
216
+ - ".yardopts"
217
+ - CHANGELOG.md
218
+ - CODE_OF_CONDUCT.md
219
+ - CONTRIBUTING.md
220
+ - LICENSE.txt
221
+ - README.md
216
222
  homepage: https://github.com/michaeltelford/wgit
217
223
  licenses:
218
224
  - MIT
@@ -221,7 +227,7 @@ metadata:
221
227
  source_code_uri: https://github.com/michaeltelford/wgit
222
228
  changelog_uri: https://github.com/michaeltelford/wgit/blob/master/CHANGELOG.md
223
229
  bug_tracker_uri: https://github.com/michaeltelford/wgit/issues
224
- documentation_uri: https://www.rubydoc.info/gems/wgit
230
+ documentation_uri: https://www.rubydoc.info/github/michaeltelford/wgit/master
225
231
  allowed_push_host: https://rubygems.org
226
232
  post_install_message:
227
233
  rdoc_options: []
@@ -238,8 +244,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
238
244
  - !ruby/object:Gem::Version
239
245
  version: '0'
240
246
  requirements: []
241
- rubyforge_project:
242
- rubygems_version: 2.7.6
247
+ rubygems_version: 3.0.6
243
248
  signing_key:
244
249
  specification_version: 4
245
250
  summary: Wgit is a Ruby gem similar in nature to GNU's `wget` tool. It provides an