wgit 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../assertable'
2
4
 
3
5
  module Wgit
@@ -9,7 +11,7 @@ module Wgit
9
11
  CONNECTION_DETAILS = {}
10
12
 
11
13
  # The keys required for a successful database connection.
12
- CONNECTION_KEYS_REQUIRED = ['DB_CONNECTION_STRING']
14
+ CONNECTION_KEYS_REQUIRED = ['DB_CONNECTION_STRING'].freeze
13
15
 
14
16
  # Set the database's connection details from the given hash. It is your
15
17
  # responsibility to ensure the correct hash vars are present and set.
@@ -34,6 +36,6 @@ module Wgit
34
36
  # @raise [KeyError] If any of the required connection details are missing.
35
37
  # @return [Hash] Containing the database connection details from the ENV.
36
38
  def self.set_connection_details_from_env
37
- self.set_connection_details(ENV)
39
+ set_connection_details(ENV)
38
40
  end
39
41
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../document'
2
4
  require_relative '../url'
3
5
  require_relative '../utils'
@@ -7,7 +9,6 @@ require 'logger'
7
9
  require 'mongo'
8
10
 
9
11
  module Wgit
10
-
11
12
  # Class modeling a DB connection and CRUD operations for the Url and
12
13
  # Document collections.
13
14
  class Database
@@ -73,7 +74,7 @@ module Wgit
73
74
  # @yield [url] Given each Url returned from the DB.
74
75
  # @return [Array<Wgit::Url>] The Urls obtained from the DB.
75
76
  def urls(crawled = nil, limit = 0, skip = 0)
76
- crawled.nil? ? query = {} : query = { crawled: crawled }
77
+ query = crawled.nil? ? {} : { crawled: crawled }
77
78
 
78
79
  sort = { date_added: 1 }
79
80
  results = retrieve(:urls, query, sort, {}, limit, skip)
@@ -129,12 +130,12 @@ module Wgit
129
130
  # @return [Array<Wgit::Document>] The search results obtained from the DB.
130
131
  def search(query, whole_sentence = false, limit = 10, skip = 0)
131
132
  query.strip!
132
- query.replace("\"" + query + "\"") if whole_sentence
133
+ query.replace('"' + query + '"') if whole_sentence
133
134
 
134
135
  # The sort_proj sorts based on the most search hits.
135
136
  # We use the sort_proj hash as both a sort and a projection below.
136
137
  # :$caseSensitive => case_sensitive, 3.2+ only.
137
- sort_proj = { score: { :$meta => "textScore" } }
138
+ sort_proj = { score: { :$meta => 'textScore' } }
138
139
  query = { :$text => { :$search => query } }
139
140
 
140
141
  results = retrieve(:documents, query, sort_proj, sort_proj, limit, skip)
@@ -188,8 +189,8 @@ module Wgit
188
189
  # @param url [Wgit::Url] The Url to search the DB for.
189
190
  # @return [Boolean] True if url exists, otherwise false.
190
191
  def url?(url)
191
- h = { "url" => url }
192
- not @@client[:urls].find(h).none?
192
+ h = { 'url' => url }
193
+ @@client[:urls].find(h).any?
193
194
  end
194
195
 
195
196
  # Returns whether or not a record with the given doc.url (which is unique)
@@ -199,8 +200,8 @@ module Wgit
199
200
  # @return [Boolean] True if doc exists, otherwise false.
200
201
  def doc?(doc)
201
202
  url = doc.respond_to?(:url) ? doc.url : doc
202
- h = { "url" => url }
203
- not @@client[:documents].find(h).none?
203
+ h = { 'url' => url }
204
+ @@client[:documents].find(h).any?
204
205
  end
205
206
 
206
207
  ### Update Data ###
@@ -220,19 +221,25 @@ module Wgit
220
221
  end
221
222
  end
222
223
 
223
- private
224
+ protected
224
225
 
225
226
  # Return if the write to the DB succeeded or not.
227
+ #
228
+ # @param result [Mongo::Object] The operation result.
229
+ # @param count [Integer] The number of records written to.
230
+ # @param multi [Boolean] True if more than one record is being written to.
231
+ # @raise [RuntimeError] If result.class isn't supported.
232
+ # @return [Boolean] True if the write was successful.
226
233
  def write_succeeded?(result, count = 1, multi = false)
227
234
  case result.class.to_s
228
235
  # Single create result.
229
- when "Mongo::Operation::Insert::Result"
236
+ when 'Mongo::Operation::Insert::Result'
230
237
  result.documents.first[:err].nil?
231
238
  # Multiple create result.
232
- when "Mongo::BulkWrite::Result"
239
+ when 'Mongo::BulkWrite::Result'
233
240
  result.inserted_count == count
234
241
  # Single and multiple update result.
235
- when "Mongo::Operation::Update::Result"
242
+ when 'Mongo::Operation::Update::Result'
236
243
  if multi
237
244
  result.n == count
238
245
  else
@@ -240,50 +247,65 @@ module Wgit
240
247
  end
241
248
  # Class no longer used, have you upgraded the 'mongo' gem?
242
249
  else
243
- raise "Result class not currently supported: #{result.class.to_s}"
250
+ raise "Result class not currently supported: #{result.class}"
244
251
  end
245
252
  end
246
253
 
247
254
  # Insert one or more Url objects into the DB.
255
+ #
256
+ # @param url_or_urls [Wgit::Url, Array<Wgit::Url>] The Url or Url's to
257
+ # insert.
258
+ # @raise [RuntimeError] If url_or_urls isn't of the correct type.
259
+ # @return [Integer] The number of inserted Url's.
248
260
  def insert_urls(url_or_urls)
249
- unless url_or_urls.respond_to?(:map)
250
- assert_type(url_or_urls, Url)
251
- url_or_urls = Wgit::Model.url(url_or_urls)
252
- else
261
+ if url_or_urls.respond_to?(:map)
253
262
  assert_arr_types(url_or_urls, Url)
254
263
  url_or_urls = url_or_urls.map do |url|
255
264
  Wgit::Model.url(url)
256
265
  end
266
+ else
267
+ assert_type(url_or_urls, Url)
268
+ url_or_urls = Wgit::Model.url(url_or_urls)
257
269
  end
258
270
  create(:urls, url_or_urls)
259
271
  end
260
272
 
261
273
  # Insert one or more Document objects into the DB.
274
+ #
275
+ # @param doc_or_docs [Wgit::Document, Array<Wgit::Document>] The Document
276
+ # or Document's to insert.
277
+ # @raise [RuntimeError] If doc_or_docs isn't of the correct type.
278
+ # @return [Integer] The number of inserted Document's.
262
279
  def insert_docs(doc_or_docs)
263
- unless doc_or_docs.respond_to?(:map)
264
- assert_type(doc_or_docs, [Document, Hash])
265
- unless doc_or_docs.is_a?(Hash)
266
- doc_or_docs = Wgit::Model.document(doc_or_docs)
267
- end
268
- else
280
+ if doc_or_docs.respond_to?(:map)
269
281
  assert_arr_types(doc_or_docs, [Document, Hash])
270
282
  doc_or_docs = doc_or_docs.map do |doc|
271
283
  Wgit::Model.document(doc) unless doc.is_a?(Hash)
272
284
  end
285
+ else
286
+ assert_type(doc_or_docs, [Document, Hash])
287
+ unless doc_or_docs.is_a?(Hash)
288
+ doc_or_docs = Wgit::Model.document(doc_or_docs)
289
+ end
273
290
  end
274
291
  create(:documents, doc_or_docs)
275
292
  end
276
293
 
277
294
  # Create/insert one or more Url or Document records into the DB.
295
+ #
296
+ # @param collection [Symbol] Either :urls or :documents.
297
+ # @param data [Hash, Array<Wgit::Url, Wgit::Document>] The data to insert.
298
+ # @raise [RuntimeError] If the data type is incorrect or if the write
299
+ # fails.
300
+ # @return [Integer] The number of inserted Objects.
278
301
  def create(collection, data)
279
302
  assert_type(data, [Hash, Array])
280
303
  # Single doc.
281
304
  if data.is_a?(Hash)
282
305
  data.merge!(Wgit::Model.common_insert_data)
283
306
  result = @@client[collection.to_sym].insert_one(data)
284
- unless write_succeeded?(result)
285
- raise "DB write (insert) failed"
286
- end
307
+ raise 'DB write (insert) failed' unless write_succeeded?(result)
308
+
287
309
  result.n
288
310
  # Multiple docs.
289
311
  elsif data.is_a?(Array)
@@ -292,9 +314,8 @@ module Wgit
292
314
  data_hash.merge(Wgit::Model.common_insert_data)
293
315
  end
294
316
  result = @@client[collection.to_sym].insert_many(data)
295
- unless write_succeeded?(result, data.length)
296
- raise "DB write(s) failed"
297
- end
317
+ raise 'DB write(s) failed' unless write_succeeded?(result, data.length)
318
+
298
319
  result.inserted_count
299
320
  else
300
321
  raise "data must be a Hash or an Array of Hash's"
@@ -302,52 +323,69 @@ module Wgit
302
323
  end
303
324
 
304
325
  # Retrieve Url or Document records from the DB.
326
+ #
327
+ # @param collection [Symbol] Either :urls or :documents.
328
+ # @param query [Hash] The query used during the retrieval.
329
+ # @param sort [Hash] The sort to use.
330
+ # @param projection [Hash] The projection to use.
331
+ # @param limit [Integer] The limit to use.
332
+ # @param skip [Integer] The skip to use.
333
+ # @return [Mongo::Object] The Mongo client find result.
305
334
  def retrieve(collection, query,
306
335
  sort = {}, projection = {},
307
336
  limit = 0, skip = 0)
308
337
  assert_type(query, Hash)
309
338
  @@client[collection.to_sym].find(query).projection(projection)
310
- .skip(skip).limit(limit).sort(sort)
339
+ .skip(skip).limit(limit).sort(sort)
311
340
  end
312
341
 
313
342
  # Update a Url object in the DB.
343
+ #
344
+ # @param url [Wgit::Url] The Url to update.
345
+ # @return [Integer] The number of updated records.
314
346
  def update_url(url)
315
347
  assert_type(url, Url)
316
348
  selection = { url: url }
317
349
  url_hash = Wgit::Model.url(url).merge(Wgit::Model.common_update_data)
318
- update = { "$set" => url_hash }
350
+ update = { '$set' => url_hash }
319
351
  _update(true, :urls, selection, update)
320
352
  end
321
353
 
322
354
  # Update a Document object in the DB.
355
+ #
356
+ # @param doc [Wgit::Document] The Document to update.
357
+ # @return [Integer] The number of updated records.
323
358
  def update_doc(doc)
324
359
  assert_type(doc, Document)
325
360
  selection = { url: doc.url }
326
361
  doc_hash = Wgit::Model.document(doc).merge(Wgit::Model.common_update_data)
327
- update = { "$set" => doc_hash }
362
+ update = { '$set' => doc_hash }
328
363
  _update(true, :documents, selection, update)
329
364
  end
330
365
 
366
+ private
367
+
331
368
  # Update one or more Url or Document records in the DB.
332
369
  # NOTE: The Model.common_update_data should be merged in the calling
333
- # method as the update param can be bespoke due to its nature.
370
+ # method as the update param can be bespoke, due to its nature.
334
371
  def _update(single, collection, selection, update)
335
372
  assert_arr_types([selection, update], Hash)
336
- if single
337
- result = @@client[collection.to_sym].update_one(selection, update)
338
- else
339
- result = @@client[collection.to_sym].update_many(selection, update)
340
- end
341
- raise "DB write (update) failed" unless write_succeeded?(result)
373
+ result = if single
374
+ @@client[collection.to_sym].update_one(selection, update)
375
+ else
376
+ @@client[collection.to_sym].update_many(selection, update)
377
+ end
378
+ raise 'DB write (update) failed' unless write_succeeded?(result)
379
+
342
380
  result.n
343
381
  end
344
382
 
345
- alias :count :size
346
- alias :length :size
347
- alias :num_documents :num_docs
348
- alias :document? :doc?
349
- alias :insert_url :insert_urls
350
- alias :insert_doc :insert_docs
351
- alias :num_objects :num_records
383
+ alias count size
384
+ alias length size
385
+ alias num_documents num_docs
386
+ alias document? doc?
387
+ alias insert_url insert_urls
388
+ alias insert_doc insert_docs
389
+ alias num_objects num_records
352
390
  end
353
391
  end
@@ -1,46 +1,48 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative '../utils'
2
4
 
3
5
  module Wgit
4
-
5
6
  # Module containing the database (DB) data model structure.
6
7
  module Model
7
-
8
8
  # The data model for a Wgit::Url.
9
9
  #
10
10
  # @param url [Wgit::Url] The URL DB record.
11
11
  # @return [Hash] The URL model ready for DB insertion.
12
12
  def self.url(url)
13
- raise "url must respond_to? to_h" unless url.respond_to?(:to_h)
13
+ raise 'url must respond_to? to_h' unless url.respond_to?(:to_h)
14
+
14
15
  model = url.to_h
15
16
  Wgit::Utils.remove_non_bson_types(model)
16
17
  end
17
-
18
+
18
19
  # The data model for a Wgit::Document.
19
20
  #
20
21
  # @param doc [Wgit::Document] The Document DB record.
21
22
  # @return [Hash] The Document model ready for DB insertion.
22
23
  def self.document(doc)
23
- raise "doc must respond_to? to_h" unless doc.respond_to?(:to_h)
24
+ raise 'doc must respond_to? to_h' unless doc.respond_to?(:to_h)
25
+
24
26
  model = doc.to_h(false)
25
27
  Wgit::Utils.remove_non_bson_types(model)
26
28
  end
27
-
29
+
28
30
  # Default fields when inserting a record into the DB.
29
31
  #
30
32
  # @return [Hash] Containing common insertion fields for all models.
31
33
  def self.common_insert_data
32
34
  {
33
- date_added: Wgit::Utils.time_stamp,
34
- date_modified: Wgit::Utils.time_stamp,
35
+ date_added: Wgit::Utils.time_stamp,
36
+ date_modified: Wgit::Utils.time_stamp
35
37
  }
36
38
  end
37
-
39
+
38
40
  # Default fields when updating a record in the DB.
39
41
  #
40
42
  # @return [Hash] Containing common update fields for all models.
41
43
  def self.common_update_data
42
44
  {
43
- date_modified: Wgit::Utils.time_stamp,
45
+ date_modified: Wgit::Utils.time_stamp
44
46
  }
45
47
  end
46
48
  end
data/lib/wgit/document.rb CHANGED
@@ -5,7 +5,6 @@ require 'nokogiri'
5
5
  require 'json'
6
6
 
7
7
  module Wgit
8
-
9
8
  # Class modeling a HTML web document. Also doubles as a search result when
10
9
  # loading Documents from the database.
11
10
  #
@@ -19,9 +18,9 @@ module Wgit
19
18
  # The HTML elements that make up the visible text on a page.
20
19
  # These elements are used to initialize the @text of the Document.
21
20
  # See the README.md for how to add to this Array dynamically.
22
- @text_elements = [
23
- :dd, :div, :dl, :dt, :figcaption, :figure, :hr, :li,
24
- :main, :ol, :p, :pre, :span, :ul, :h1, :h2, :h3, :h4, :h5
21
+ @text_elements = %i[
22
+ dd div dl dt figcaption figure hr li
23
+ main ol p pre span ul h1 h2 h3 h4 h5
25
24
  ]
26
25
 
27
26
  class << self
@@ -58,14 +57,14 @@ module Wgit
58
57
  # keys.
59
58
  # @param html [String] The crawled web page's HTML. This param is only
60
59
  # required if url_or_obj is a String representing the web page's URL.
61
- def initialize(url_or_obj, html = "")
60
+ def initialize(url_or_obj, html = '')
62
61
  # Init from URL String and HTML String.
63
62
  if url_or_obj.is_a?(String)
64
63
  url = url_or_obj
65
64
  assert_type(url, Wgit::Url)
66
65
 
67
66
  @url = url
68
- @html = html ||= ""
67
+ @html = html || ''
69
68
  @doc = init_nokogiri
70
69
  @score = 0.0
71
70
 
@@ -73,9 +72,9 @@ module Wgit
73
72
 
74
73
  # Dynamically run the init_*_from_html methods.
75
74
  Document.private_instance_methods(false).each do |method|
76
- if method.to_s.start_with?("init_") &&
77
- method.to_s.end_with?("_from_html")
78
- self.send(method)
75
+ if method.to_s.start_with?('init_') &&
76
+ method.to_s.end_with?('_from_html')
77
+ send(method)
79
78
  end
80
79
  end
81
80
  # Init from a Hash like object containing Strings as keys e.g. Mongo
@@ -84,18 +83,18 @@ module Wgit
84
83
  obj = url_or_obj
85
84
  assert_respond_to(obj, :fetch)
86
85
 
87
- @url = Wgit::Url.new(obj.fetch("url")) # Should always be present.
88
- @html = obj.fetch("html", "")
86
+ @url = Wgit::Url.new(obj.fetch('url')) # Should always be present.
87
+ @html = obj.fetch('html', '')
89
88
  @doc = init_nokogiri
90
- @score = obj.fetch("score", 0.0)
89
+ @score = obj.fetch('score', 0.0)
91
90
 
92
91
  process_url_and_html
93
92
 
94
93
  # Dynamically run the init_*_from_object methods.
95
94
  Document.private_instance_methods(false).each do |method|
96
- if method.to_s.start_with?("init_") &&
97
- method.to_s.end_with?("_from_object")
98
- self.send(method, obj)
95
+ if method.to_s.start_with?('init_') &&
96
+ method.to_s.end_with?('_from_object')
97
+ send(method, obj)
99
98
  end
100
99
  end
101
100
  end
@@ -108,7 +107,8 @@ module Wgit
108
107
  # @return [Boolean] True if @url and @html are equal, false if not.
109
108
  def ==(other_doc)
110
109
  return false unless other_doc.is_a? Wgit::Document
111
- @url == other_doc.url and @html == other_doc.html
110
+
111
+ (@url == other_doc.url) && (@html == other_doc.html)
112
112
  end
113
113
 
114
114
  # Is a shortcut for calling Document#html[range].
@@ -148,7 +148,7 @@ module Wgit
148
148
  assert_type(link, Wgit::Url)
149
149
  raise "link must be relative: #{link}" unless link.is_relative?
150
150
 
151
- if link.is_anchor? or link.is_query_string?
151
+ if link.is_anchor? || link.is_query_string?
152
152
  base_url = @base ? get_base.call : @url
153
153
  return base_url.without_anchor.without_query_string
154
154
  end
@@ -166,8 +166,8 @@ module Wgit
166
166
  # returned Hash.
167
167
  # @return [Hash] Containing self's instance vars.
168
168
  def to_h(include_html = false)
169
- ignore = include_html ? [] : ["@html"]
170
- ignore << "@doc" # Always ignore "@doc"
169
+ ignore = include_html ? [] : ['@html']
170
+ ignore << '@doc' # Always ignore "@doc"
171
171
  Wgit::Utils.to_h(self, ignore)
172
172
  end
173
173
 
@@ -200,8 +200,9 @@ module Wgit
200
200
  # Else take the var's #length method return value.
201
201
  else
202
202
  next unless instance_variable_get(var).respond_to?(:length)
203
+
203
204
  hash[var[1..-1].to_sym] =
204
- instance_variable_get(var).send(:length)
205
+ instance_variable_get(var).send(:length)
205
206
  end
206
207
  end
207
208
  hash
@@ -219,6 +220,7 @@ module Wgit
219
220
  # @return [Boolean] True if @html is nil/empty, false otherwise.
220
221
  def empty?
221
222
  return true if @html.nil?
223
+
222
224
  @html.empty?
223
225
  end
224
226
 
@@ -252,12 +254,12 @@ module Wgit
252
254
  def internal_links
253
255
  return [] if @links.empty?
254
256
 
255
- links = @links.
256
- reject { |link| !link.is_relative?(host: @url.to_base) }.
257
- map(&:without_base).
258
- map do |link| # We map @url.to_host into / because it's a duplicate.
259
- link.to_host == @url.to_host ? Wgit::Url.new('/') : link
260
- end
257
+ links = @links
258
+ .select { |link| link.is_relative?(host: @url.to_base) }
259
+ .map(&:without_base)
260
+ .map do |link| # We map @url.to_host into / because it's a duplicate.
261
+ link.to_host == @url.to_host ? Wgit::Url.new('/') : link
262
+ end
261
263
 
262
264
  Wgit::Utils.process_arr(links)
263
265
  end
@@ -271,6 +273,7 @@ module Wgit
271
273
  def internal_full_links
272
274
  links = internal_links
273
275
  return [] if links.empty?
276
+
274
277
  links.map { |link| base_url(link: link).concat(link) }
275
278
  end
276
279
 
@@ -281,9 +284,9 @@ module Wgit
281
284
  def external_links
282
285
  return [] if @links.empty?
283
286
 
284
- links = @links.
285
- reject { |link| link.relative_link?(host: @url.to_base) }.
286
- map(&:without_trailing_slash)
287
+ links = @links
288
+ .reject { |link| link.relative_link?(host: @url.to_base) }
289
+ .map(&:without_trailing_slash)
287
290
 
288
291
  Wgit::Utils.process_arr(links)
289
292
  end
@@ -304,24 +307,25 @@ module Wgit
304
307
  # sentence.
305
308
  # @return [Array<String>] Representing the search results.
306
309
  def search(query, sentence_limit = 80)
307
- raise "A search query must be provided" if query.empty?
308
- raise "The sentence_limit value must be even" if sentence_limit.odd?
310
+ raise 'A search query must be provided' if query.empty?
311
+ raise 'The sentence_limit value must be even' if sentence_limit.odd?
309
312
 
310
313
  results = {}
311
314
  regex = Regexp.new(query, Regexp::IGNORECASE)
312
315
 
313
316
  @text.each do |sentence|
314
317
  hits = sentence.scan(regex).count
315
- if hits > 0
316
- sentence.strip!
317
- index = sentence.index(regex)
318
- Wgit::Utils.format_sentence_length(sentence, index, sentence_limit)
319
- results[sentence] = hits
320
- end
318
+ next unless hits > 0
319
+
320
+ sentence.strip!
321
+ index = sentence.index(regex)
322
+ Wgit::Utils.format_sentence_length(sentence, index, sentence_limit)
323
+ results[sentence] = hits
321
324
  end
322
325
 
323
326
  return [] if results.empty?
324
- results = Hash[results.sort_by { |k, v| v }]
327
+
328
+ results = Hash[results.sort_by { |_k, v| v }]
325
329
  results.keys.reverse
326
330
  end
327
331
 
@@ -347,12 +351,13 @@ module Wgit
347
351
  #
348
352
  # @return [String] An xpath String to obtain a webpage's text elements.
349
353
  def self.text_elements_xpath
350
- xpath = ""
354
+ xpath = ''
351
355
  return xpath if Wgit::Document.text_elements.empty?
352
- el_xpath = "//%s/text()"
356
+
357
+ el_xpath = '//%s/text()'
353
358
  Wgit::Document.text_elements.each_with_index do |el, i|
354
- xpath += " | " unless i == 0
355
- xpath += el_xpath % [el]
359
+ xpath += ' | ' unless i == 0
360
+ xpath += format(el_xpath, el)
356
361
  end
357
362
  xpath
358
363
  end
@@ -429,35 +434,42 @@ module Wgit
429
434
  false
430
435
  end
431
436
 
432
- private
437
+ protected
433
438
 
434
- # Initializes the nokogiri object using @html, which must be already set.
439
+ # Initializes the nokogiri object using @html, which cannot be nil.
440
+ # Override this method to custom configure the Nokogiri object returned.
441
+ # Gets called from Wgit::Document.new.
442
+ #
443
+ # @return [Nokogiri::HTML] The initialised Nokogiri HTML object.
435
444
  def init_nokogiri
436
- raise "@html must be set" unless @html
445
+ raise '@html must be set' unless @html
446
+
437
447
  Nokogiri::HTML(@html) do |config|
438
448
  # TODO: Remove #'s below when crawling in production.
439
- #config.options = Nokogiri::XML::ParseOptions::STRICT |
449
+ # config.options = Nokogiri::XML::ParseOptions::STRICT |
440
450
  # Nokogiri::XML::ParseOptions::NONET
441
451
  end
442
452
  end
443
453
 
444
- # Ensure the @url and @html Strings are correctly encoded etc.
445
- def process_url_and_html
446
- @url = Wgit::Utils.process_str(@url)
447
- @html = Wgit::Utils.process_str(@html)
448
- end
449
-
450
- # Returns an object/value from this Document's @html using the provided
451
- # xpath param.
452
- # singleton ? results.first (single Object) : results (Array)
453
- # text_content_only ? result.content (String) : result (nokogiri Object)
454
- # A block can be used to set the final value before it is returned.
455
- # Return nil from the block if you don't want to override the value.
454
+ # Returns a value/object from this Document's @html using the given xpath
455
+ # parameter.
456
+ #
457
+ # @param xpath [String] Used to find the value/object in @html.
458
+ # @param singleton [Boolean] singleton ? results.first (single Nokogiri
459
+ # Object) : results (Array).
460
+ # @param text_content_only [Boolean] text_content_only ? result.content
461
+ # (String) : result (Nokogiri Object).
462
+ # @yield [String/Object, Symbol] Given the value before it's set as an
463
+ # instance variable so that you can inspect/alter the value if desired.
464
+ # Return nil from the block if you don't want to override the value. Also
465
+ # given the source which is always :html.
466
+ # @return [String, Object] The value found in the html or the default value
467
+ # (singleton ? nil : []).
456
468
  def find_in_html(xpath, singleton: true, text_content_only: true)
457
469
  xpath = xpath.call if xpath.respond_to?(:call)
458
470
  results = @doc.xpath(xpath)
459
471
 
460
- if results and not results.empty?
472
+ if results && !results.empty?
461
473
  result = if singleton
462
474
  text_content_only ? results.first.content : results.first
463
475
  else
@@ -477,10 +489,17 @@ module Wgit
477
489
  result
478
490
  end
479
491
 
480
- # Finds a value in the obj using the key.
481
- # singleton is used to set the value if not found in obj.
482
- # A block can be used to set the final value before it is returned.
483
- # Return nil from the block if you don't want to override the value.
492
+ # Returns a value from the obj using the given key via obj#fetch.
493
+ #
494
+ # @param obj [Object#fetch] The object containing the key/value.
495
+ # @param key [String] Used to find the value in the obj.
496
+ # @param singleton [Boolean] True if a single value, false otherwise.
497
+ # @yield [String/Object, Symbol] Given the value before it's set as an
498
+ # instance variable so that you can inspect/alter the value if desired.
499
+ # Return nil from the block if you don't want to override the value. Also
500
+ # given the source which is always :object.
501
+ # @return [String, Object] The value found in the obj or the default value
502
+ # (singleton ? nil : []).
484
503
  def find_in_object(obj, key, singleton: true)
485
504
  assert_respond_to(obj, :fetch)
486
505
 
@@ -496,14 +515,17 @@ module Wgit
496
515
  result
497
516
  end
498
517
 
518
+ private
519
+
499
520
  # Initialises an instance variable and defines a getter method for it.
521
+ #
500
522
  # @param var [Symbol] The name of the variable to be initialized.
501
523
  # @param value [Object] The newly initialized variable's value.
502
524
  # @return [Symbol] The name of the newly created getter method.
503
525
  def init_var(var, value)
504
526
  # instance_var_name starts with @, var_name doesn't.
505
527
  var = var.to_s
506
- var_name = (var.start_with?("@") ? var[1..-1] : var).to_sym
528
+ var_name = (var.start_with?('@') ? var[1..-1] : var).to_sym
507
529
  instance_var_name = "@#{var_name}".to_sym
508
530
 
509
531
  instance_variable_set(instance_var_name, value)
@@ -513,13 +535,19 @@ module Wgit
513
535
  end
514
536
  end
515
537
 
516
- alias :relative_links :internal_links
517
- alias :relative_urls :internal_links
518
- alias :relative_full_links :internal_full_links
519
- alias :relative_full_urls :internal_full_links
520
- alias :internal_absolute_links :internal_full_links
521
- alias :relative_absolute_links :internal_full_links
522
- alias :relative_absolute_urls :internal_full_links
523
- alias :external_urls :external_links
538
+ # Ensure the @url and @html Strings are correctly encoded etc.
539
+ def process_url_and_html
540
+ @url = Wgit::Utils.process_str(@url)
541
+ @html = Wgit::Utils.process_str(@html)
542
+ end
543
+
544
+ alias relative_links internal_links
545
+ alias relative_urls internal_links
546
+ alias relative_full_links internal_full_links
547
+ alias relative_full_urls internal_full_links
548
+ alias internal_absolute_links internal_full_links
549
+ alias relative_absolute_links internal_full_links
550
+ alias relative_absolute_urls internal_full_links
551
+ alias external_urls external_links
524
552
  end
525
553
  end