scraper_utils 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 917ac18062a2b514b864ec39593a508c27cce14bd7c32fa71f13daed2ff442c1
4
- data.tar.gz: 4f9652b9eab73158f2843730214b7e0b57a7ec854854f7be91080e06d8ec86e3
3
+ metadata.gz: 9a1001f794ef04c587bb726157c66fc637fbb8525bac1c5be93a138e7f0a8266
4
+ data.tar.gz: f92023b5362c6b64ae74d0bf43cf613b02849687a46ec7fbb6b51c4b7ad397dc
5
5
  SHA512:
6
- metadata.gz: 5b99f780772f265aea38cb8c09bf88c1c58a933642a4e42bd0bd424f4a51681fd596a64a84b939bb21f9a681c2b6ce832e0a32f7f4da25fc12ce1bd8fe73d2d5
7
- data.tar.gz: 820d683532470049469a2926f946e58a64fbc7f24978e83593e6b8a28d656c0d544397ef35f8c39c232c4c91fc69f435a28a46cf094a6238b21a9d0b8fa57b33
6
+ metadata.gz: 88e952e952d59011018ca4721bde72d49c913beccccf098d62bb4d1313d0ca3bf94678ff27db5ba4cef3a674fefbebd067a5008e5f36a2029f2a9c8ac1689b15
7
+ data.tar.gz: 35601498d9d110d5d365aa7c1fddcfa74a86fde4b93537b44f8e00bb84f664ba455c642256c0032e221b484d986ea39b2d3ab743c94102b10c7bed1c397139d5
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.16.0 - 2026-04-08
4
+ * Use defaults from AgentConfig for `throttle_block`, and allow defaults to be overriden
5
+
3
6
  ## 0.15.0 - 2026-03-05
4
7
 
5
8
  * Add `validate_info_urls_are_present!` to check info_urls respond with 2xx status using HEAD requests
@@ -27,21 +27,20 @@ class Scraper
27
27
  begin
28
28
  ScraperUtils::DataQualityMonitor.start_authority(authority_label)
29
29
  YourScraper.scrape(authority_label) do |record|
30
- begin
31
- record["authority_label"] = authority_label.to_s
32
- ScraperUtils::DbUtils.save_record(record)
33
- rescue ScraperUtils::UnprocessableRecord => e
34
- # Log bad record but continue processing unless too many have occurred
35
- ScraperUtils::DataQualityMonitor.log_unprocessable_record(e, record)
36
- unprocessable_record_details << [e, record]
37
- end
30
+ record["authority_label"] = authority_label.to_s
31
+ ScraperUtils::DbUtils.save_record(record)
32
+ rescue ScraperUtils::UnprocessableRecord => e
33
+ # Log bad record but continue processing unless too many have occurred
34
+ ScraperUtils::DataQualityMonitor.log_unprocessable_record(e, record)
35
+ unprocessable_record_details << [e, record]
38
36
  end
39
37
  rescue StandardError => e
40
38
  warn "#{authority_label}: ERROR: #{e}"
41
39
  warn e.backtrace
42
40
  fatal_exception = e
43
41
  end
44
- [authority_label, ScraperUtils::DbUtils.collected_saves, unprocessable_record_details, fatal_exception]
42
+ [authority_label, ScraperUtils::DbUtils.collected_saves, unprocessable_record_details,
43
+ fatal_exception]
45
44
  end
46
45
 
47
46
  # Process authorities in parallel
@@ -54,7 +53,7 @@ class Scraper
54
53
  scrape_authority(authority_label, attempt)
55
54
  end.each do |authority_label, saves, unprocessable, fatal_exception|
56
55
  # Runs in main process
57
- status = fatal_exception ? 'FAILED' : 'OK'
56
+ status = fatal_exception ? "FAILED" : "OK"
58
57
  puts "Saving results of #{authority_label}: #{saves.size} records, #{unprocessable.size} unprocessable #{status}"
59
58
 
60
59
  saves.each do |record|
@@ -65,11 +64,11 @@ class Scraper
65
64
  exceptions[authority_label] = e
66
65
  end
67
66
 
68
- if fatal_exception
69
- puts " Warning: #{authority_label} failed with: #{fatal_exception.message}"
70
- puts " Saved #{saves.size} records before failure"
71
- exceptions[authority_label] = fatal_exception
72
- end
67
+ next unless fatal_exception
68
+
69
+ puts " Warning: #{authority_label} failed with: #{fatal_exception.message}"
70
+ puts " Saved #{saves.size} records before failure"
71
+ exceptions[authority_label] = fatal_exception
73
72
  end
74
73
 
75
74
  exceptions
@@ -96,7 +95,7 @@ class Scraper
96
95
  unless exceptions.empty?
97
96
  puts "\n***************************************************"
98
97
  puts "Now retrying authorities which earlier had failures"
99
- puts exceptions.keys.join(", ").to_s
98
+ puts exceptions.keys.join(", ")
100
99
  puts "***************************************************"
101
100
 
102
101
  start_time = Time.now
@@ -118,7 +117,7 @@ end
118
117
  if __FILE__ == $PROGRAM_NAME
119
118
  ENV["MORPH_EXPECT_BAD"] ||= "some,councils"
120
119
 
121
- process_count = (ENV['MORPH_PROCESSES'] || Etc.nprocessors * 2).to_i
120
+ process_count = (ENV["MORPH_PROCESSES"] || (Etc.nprocessors * 2)).to_i
122
121
 
123
122
  Scraper.run(Scraper.selected_authorities, process_count: process_count)
124
123
  end
@@ -22,13 +22,11 @@ class Scraper
22
22
  # REPLACE section with:
23
23
  ScraperUtils::DataQualityMonitor.start_authority(authority_label)
24
24
  YourScraper.scrape(authority_label) do |record|
25
- begin
26
- record["authority_label"] = authority_label.to_s
27
- ScraperUtils::DbUtils.save_record(record)
28
- rescue ScraperUtils::UnprocessableRecord => e
29
- ScraperUtils::DataQualityMonitor.log_unprocessable_record(e, record)
30
- exceptions[authority_label] = e
31
- end
25
+ record["authority_label"] = authority_label.to_s
26
+ ScraperUtils::DbUtils.save_record(record)
27
+ rescue ScraperUtils::UnprocessableRecord => e
28
+ ScraperUtils::DataQualityMonitor.log_unprocessable_record(e, record)
29
+ exceptions[authority_label] = e
32
30
  end
33
31
  # END OF REPLACE
34
32
  rescue StandardError => e
@@ -61,7 +59,7 @@ class Scraper
61
59
  puts "Now retrying authorities which earlier had failures"
62
60
  puts exceptions.keys.join(", ")
63
61
  puts "***************************************************"
64
- ENV['DEBUG'] ||= '1'
62
+ ENV["DEBUG"] ||= "1"
65
63
 
66
64
  start_time = Time.now
67
65
  exceptions = scrape(exceptions.keys, 2)
@@ -85,12 +83,11 @@ if __FILE__ == $PROGRAM_NAME
85
83
  # some: url-for-issue Summary Reason
86
84
  # councils: url-for-issue Summary Reason
87
85
 
88
- if ENV['MORPH_EXPECT_BAD'].nil?
89
- default_expect_bad = {
90
- }
91
- puts 'Default EXPECT_BAD:', default_expect_bad.to_yaml if default_expect_bad.any?
86
+ if ENV["MORPH_EXPECT_BAD"].nil?
87
+ default_expect_bad = {}
88
+ puts "Default EXPECT_BAD:", default_expect_bad.to_yaml if default_expect_bad.any?
92
89
 
93
- ENV["MORPH_EXPECT_BAD"] = default_expect_bad.keys.join(',')
90
+ ENV["MORPH_EXPECT_BAD"] = default_expect_bad.keys.join(",")
94
91
  end
95
92
  # If the sites have many unusable records - raise defaults
96
93
  # ENV['MORPH_UNPROCESSABLE_BASE'] ||= "10"
@@ -26,7 +26,7 @@ if File.exist?(config_file)
26
26
  config = YAML.safe_load(File.read(config_file), symbolize_names: true)
27
27
  options.merge!(config) if config
28
28
  puts "Loaded config from #{config_file}"
29
- rescue => e
29
+ rescue StandardError => e
30
30
  puts "Warning: Could not load #{config_file}: #{e.message}"
31
31
  end
32
32
  end
@@ -38,19 +38,23 @@ OptionParser.new do |opts|
38
38
  options[:database] = db
39
39
  end
40
40
 
41
- opts.on("-g", "--geocodable-percentage N", Integer, "Min percentage of geocodable addresses (default: 50)") do |n|
41
+ opts.on("-g", "--geocodable-percentage N", Integer,
42
+ "Min percentage of geocodable addresses (default: 50)") do |n|
42
43
  options[:geocodable_percentage] = n
43
44
  end
44
45
 
45
- opts.on("-r", "--description-percentage N", Integer, "Min percentage of reasonable descriptions (default: 50)") do |n|
46
+ opts.on("-r", "--description-percentage N", Integer,
47
+ "Min percentage of reasonable descriptions (default: 50)") do |n|
46
48
  options[:description_percentage] = n
47
49
  end
48
50
 
49
- opts.on("-u", "--info-url-percentage N", Integer, "Min percentage for info URL validation (default: 75)") do |n|
51
+ opts.on("-u", "--info-url-percentage N", Integer,
52
+ "Min percentage for info URL validation (default: 75)") do |n|
50
53
  options[:info_url_percentage] = n
51
54
  end
52
55
 
53
- opts.on("-v", "--variation N", Integer, "Variation tolerance for all validations (default: 3)") do |n|
56
+ opts.on("-v", "--variation N", Integer,
57
+ "Variation tolerance for all validations (default: 3)") do |n|
54
58
  options[:geocodable_variation] = n
55
59
  options[:description_variation] = n
56
60
  options[:info_url_variation] = n
@@ -60,11 +64,13 @@ OptionParser.new do |opts|
60
64
  options[:bot_check_expected] = true
61
65
  end
62
66
 
63
- opts.on("-i", "--global-info-url URL", "Validate all records use this global info URL (auto-detected if all URLs are the same)") do |url|
67
+ opts.on("-i", "--global-info-url URL",
68
+ "Validate all records use this global info URL (auto-detected if all URLs are the same)") do |url|
64
69
  options[:global_info_url] = url
65
70
  end
66
71
 
67
- opts.on("-c", "--config FILE", "Load config from YAML file (default: .scraper_validation.yml)") do |file|
72
+ opts.on("-c", "--config FILE",
73
+ "Load config from YAML file (default: .scraper_validation.yml)") do |file|
68
74
  config_file = file
69
75
  end
70
76
 
@@ -142,7 +148,6 @@ begin
142
148
 
143
149
  puts
144
150
  puts "✅ All validations passed!"
145
-
146
151
  rescue RuntimeError => e
147
152
  puts
148
153
  puts "❌ Validation failed: #{e.message}"
@@ -30,8 +30,13 @@ module ScraperUtils
30
30
  # Initial base of 5.01 (override using MORPH_UNPROCESSABLE_BASE)
31
31
  # Initial percentage of 10% (override using MORPH_UNPROCESSABLE_PERCENTAGE)
32
32
  def self.threshold(authority_label)
33
- ENV.fetch('MORPH_UNPROCESSABLE_BASE', 5.01).to_f +
34
- (@stats[authority_label][:saved].to_i * ENV.fetch('MORPH_UNPROCESSABLE_PERCENTAGE', 10.0).to_f / 100.0) if @stats&.fetch(authority_label, nil)
33
+ if @stats&.fetch(
34
+ authority_label, nil
35
+ )
36
+ ENV.fetch("MORPH_UNPROCESSABLE_BASE", 5.01).to_f +
37
+ (@stats[authority_label][:saved].to_i * ENV.fetch("MORPH_UNPROCESSABLE_PERCENTAGE",
38
+ 10.0).to_f / 100.0)
39
+ end
35
40
  end
36
41
 
37
42
  # Logs an unprocessable record and raises an exception if error threshold is exceeded
@@ -44,7 +49,7 @@ module ScraperUtils
44
49
  def self.log_unprocessable_record(exception, record)
45
50
  authority_label = extract_authority(record)
46
51
  @stats[authority_label][:unprocessed] += 1
47
- details = if record&.key?('council_reference') && record&.key?('address')
52
+ details = if record&.key?("council_reference") && record&.key?("address")
48
53
  "#{record['council_reference']} - #{record['address']}"
49
54
  else
50
55
  record.inspect
@@ -64,7 +69,7 @@ module ScraperUtils
64
69
  def self.log_saved_record(record)
65
70
  authority_label = extract_authority(record)
66
71
  @stats[authority_label][:saved] += 1
67
- ScraperUtils::LogUtils.log "Saving record #{authority_label&.empty? ? '' : "for #{authority_label}: "}#{record['council_reference']} - #{record['address']}"
72
+ ScraperUtils::LogUtils.log "Saving record #{"for #{authority_label}: " unless authority_label&.empty?}#{record['council_reference']} - #{record['address']}"
68
73
  end
69
74
  end
70
75
  end
@@ -63,12 +63,16 @@ module ScraperUtils
63
63
  LogUtils.log "Deleting #{deleted_count} applications scraped between #{oldest_date} and #{cutoff_date}"
64
64
  ScraperWiki.sqliteexecute("DELETE FROM data WHERE date_scraped < ?", [cutoff_date])
65
65
 
66
- return unless rand < 0.03 || (oldest_date && oldest_date < vacuum_cutoff_date) || ENV["VACUUM"] || force
66
+ unless rand < 0.03 || (oldest_date && oldest_date < vacuum_cutoff_date) || ENV["VACUUM"] || force
67
+ return
68
+ end
67
69
 
68
70
  LogUtils.log " Running VACUUM to reclaim space..."
69
71
  ScraperWiki.sqliteexecute("VACUUM")
70
72
  rescue SqliteMagic::NoSuchTable => e
71
- ScraperUtils::LogUtils.log "Ignoring: #{e} whilst cleaning old records" if ScraperUtils::DebugUtils.trace?
73
+ if ScraperUtils::DebugUtils.trace?
74
+ ScraperUtils::LogUtils.log "Ignoring: #{e} whilst cleaning old records"
75
+ end
72
76
  end
73
77
  end
74
78
  end
@@ -18,7 +18,7 @@ module ScraperUtils
18
18
  # Checks DEBUG and MORPH_DEBUG env variables
19
19
  # @return [Integer] Debug level
20
20
  def self.debug_level
21
- debug = ENV.fetch(DEBUG_ENV_VAR, ENV.fetch(MORPH_DEBUG_ENV_VAR, '0'))
21
+ debug = ENV.fetch(DEBUG_ENV_VAR, ENV.fetch(MORPH_DEBUG_ENV_VAR, "0"))
22
22
  debug =~ /^\d/ ? debug.to_i : BASIC_LEVEL
23
23
  end
24
24
 
@@ -48,7 +48,6 @@ module ScraperUtils
48
48
  debug?(TRACE_LEVEL)
49
49
  end
50
50
 
51
-
52
51
  # Logs details of an HTTP request when debug mode is enabled
53
52
  #
54
53
  # @param http_method [String] HTTP http_method (GET, POST, etc.)
@@ -21,7 +21,7 @@ module ScraperUtils
21
21
  @crawl_delay = crawl_delay.to_f
22
22
  # Clamp between 10 (delay 9x response) and 100 (no extra delay)
23
23
  @max_load = max_load ? max_load.to_f.clamp(10.0, 100.0) : nil
24
- @next_request_at = {} # hostname => Time
24
+ @next_request_at = {} # hostname => Time
25
25
  @request_started_at = {} # hostname => Time
26
26
  end
27
27
 
@@ -52,23 +52,19 @@ module ScraperUtils
52
52
  response_time = Time.now - started
53
53
 
54
54
  delay = @crawl_delay
55
- if @max_load
56
- delay += (100.0 - @max_load) * response_time / @max_load
57
- end
55
+ delay += (100.0 - @max_load) * response_time / @max_load if @max_load
58
56
 
59
- if overloaded
60
- delay = delay + response_time * 2 + 5.0
61
- end
57
+ delay = delay + (response_time * 2) + 5.0 if overloaded
62
58
 
63
59
  delay = delay.round(3).clamp(0.0, MAX_DELAY)
64
60
  @next_request_at[hostname] = Time.now + delay
65
61
 
66
- if DebugUtils.basic?
67
- msg = "HostThrottler: #{hostname} response=#{response_time.round(3)}s"
68
- msg += " OVERLOADED" if overloaded
69
- msg += ", Will delay #{delay}s before next request"
70
- LogUtils.log(msg)
71
- end
62
+ return unless DebugUtils.basic?
63
+
64
+ msg = "HostThrottler: #{hostname} response=#{response_time.round(3)}s"
65
+ msg += " OVERLOADED" if overloaded
66
+ msg += ", Will delay #{delay}s before next request"
67
+ LogUtils.log(msg)
72
68
  end
73
69
 
74
70
  # Duck-type check for HTTP overload errors across Mechanize, HTTParty, etc.
@@ -14,7 +14,7 @@ module ScraperUtils
14
14
  # @param message [String] the message to log
15
15
  # @return [void]
16
16
  def self.log(message, authority = nil)
17
- authority ||= ENV['AUTHORITY']
17
+ authority ||= ENV.fetch("AUTHORITY", nil)
18
18
  $stderr.flush
19
19
  if authority
20
20
  puts "[#{authority}] #{message}"
@@ -85,7 +85,7 @@ module ScraperUtils
85
85
  failed
86
86
  )
87
87
 
88
- DbUtils::cleanup_old_records
88
+ DbUtils.cleanup_old_records
89
89
  end
90
90
 
91
91
  # Extracts the first relevant line from backtrace that's from our project
@@ -104,15 +104,15 @@ module ScraperUtils
104
104
  format = options[:format] || false
105
105
 
106
106
  # Normalize the root directory path with a trailing slash
107
- pwd = File.join(pwd, '')
107
+ pwd = File.join(pwd, "")
108
108
 
109
109
  backtrace.each do |line|
110
- next if line.include?('/gems/') ||
111
- line.include?('/vendor/') ||
112
- line.include?('/ruby/')
110
+ next if line.include?("/gems/") ||
111
+ line.include?("/vendor/") ||
112
+ line.include?("/ruby/")
113
113
 
114
114
  if line.start_with?(pwd)
115
- relative_path = line.sub(pwd, '')
115
+ relative_path = line.sub(pwd, "")
116
116
  return format ? " [#{relative_path}]" : relative_path
117
117
  end
118
118
  end
@@ -138,7 +138,7 @@ module ScraperUtils
138
138
  puts "\nScraping Summary:"
139
139
  summary_format = "%-20s %6s %6s %s"
140
140
 
141
- puts format(summary_format, 'Authority', 'OK', 'Bad', 'Exception')
141
+ puts format(summary_format, "Authority", "OK", "Bad", "Exception")
142
142
  puts format(summary_format, "-" * 20, "-" * 6, "-" * 6, "-" * 50)
143
143
 
144
144
  authorities.each do |authority|
@@ -149,7 +149,8 @@ module ScraperUtils
149
149
 
150
150
  expect_bad_prefix = expect_bad.include?(authority) ? "[EXPECT BAD] " : ""
151
151
  exception_msg = if exceptions[authority]
152
- location = self.project_backtrace_line(exceptions[authority].backtrace, format: true)
152
+ location = project_backtrace_line(exceptions[authority].backtrace,
153
+ format: true)
153
154
  "#{exceptions[authority].class} - #{exceptions[authority]}#{location}"
154
155
  else
155
156
  "-"
@@ -174,12 +175,12 @@ module ScraperUtils
174
175
 
175
176
  # Check for authorities with unexpected errors
176
177
  unexpected_errors = authorities
177
- .select { |authority| exceptions[authority] }
178
- .reject { |authority| expect_bad.include?(authority) }
178
+ .select { |authority| exceptions[authority] }
179
+ .reject { |authority| expect_bad.include?(authority) }
179
180
 
180
181
  if unexpected_errors.any?
181
182
  errors << "ERROR: Unexpected errors in: #{unexpected_errors.join(',')} " \
182
- "(Add to MORPH_EXPECT_BAD?)"
183
+ "(Add to MORPH_EXPECT_BAD?)"
183
184
  unexpected_errors.each do |authority|
184
185
  error = exceptions[authority]
185
186
  errors << " #{authority}: #{error.class} - #{error}"
@@ -228,7 +229,8 @@ module ScraperUtils
228
229
  # Moved to DbUtils
229
230
  # :nocov:
230
231
  def self.cleanup_old_records(force: false)
231
- warn "`#{self.class}##{__method__}` is deprecated and will be removed in a future release, use `ScraperUtils::DbUtils.cleanup_old_records` instead.", category: :deprecated
232
+ warn "`#{self.class}##{__method__}` is deprecated and will be removed in a future release, use `ScraperUtils::DbUtils.cleanup_old_records` instead.",
233
+ category: :deprecated
232
234
  ScraperUtils::DbUtils.cleanup_old_records(force: force)
233
235
  end
234
236
  # :nocov:
@@ -239,7 +241,9 @@ module ScraperUtils
239
241
 
240
242
  lines = []
241
243
  error.backtrace.each do |line|
242
- lines << line if lines.length < 2 || !(line.include?("/vendor/") || line.include?("/gems/") || line.include?("/ruby/"))
244
+ if lines.length < 2 || !(line.include?("/vendor/") || line.include?("/gems/") || line.include?("/ruby/"))
245
+ lines << line
246
+ end
243
247
  break if lines.length >= 6
244
248
  end
245
249
 
@@ -11,7 +11,8 @@ module ScraperUtils
11
11
  def self.fibonacci_series(max)
12
12
  result = []
13
13
  # Start with the basic Fibonacci sequence
14
- last_fib, this_fib = 1, 0
14
+ last_fib = 1
15
+ this_fib = 0
15
16
  while this_fib <= max
16
17
  result << this_fib
17
18
  yield this_fib if block_given?
@@ -61,12 +61,12 @@ module ScraperUtils
61
61
  # Reset all configuration options to their default values
62
62
  # @return [void]
63
63
  def reset_defaults!
64
- @default_timeout = ENV.fetch('MORPH_CLIENT_TIMEOUT', DEFAULT_TIMEOUT).to_i # 60
65
- @default_disable_ssl_certificate_check = !ENV.fetch('MORPH_DISABLE_SSL_CHECK', nil).to_s.empty? # false
66
- @default_australian_proxy = !ENV.fetch('MORPH_USE_PROXY', nil).to_s.empty? # false
67
- @default_user_agent = ENV.fetch('MORPH_USER_AGENT', nil) # Uses Mechanize user agent
68
- @default_crawl_delay = ENV.fetch('MORPH_CLIENT_CRAWL_DELAY', DEFAULT_CRAWL_DELAY)
69
- @default_max_load = ENV.fetch('MORPH_MAX_LOAD', DEFAULT_MAX_LOAD)
64
+ @default_timeout = ENV.fetch("MORPH_CLIENT_TIMEOUT", DEFAULT_TIMEOUT).to_i # 60
65
+ @default_disable_ssl_certificate_check = !ENV.fetch("MORPH_DISABLE_SSL_CHECK", nil).to_s.empty? # false
66
+ @default_australian_proxy = !ENV.fetch("MORPH_USE_PROXY", nil).to_s.empty? # false
67
+ @default_user_agent = ENV.fetch("MORPH_USER_AGENT", nil) # Uses Mechanize user agent
68
+ @default_crawl_delay = ENV.fetch("MORPH_CLIENT_CRAWL_DELAY", DEFAULT_CRAWL_DELAY)
69
+ @default_max_load = ENV.fetch("MORPH_MAX_LOAD", DEFAULT_MAX_LOAD)
70
70
  end
71
71
  end
72
72
 
@@ -113,10 +113,10 @@ module ScraperUtils
113
113
  @australian_proxy &&= !ScraperUtils.australian_proxy.to_s.empty?
114
114
  if @australian_proxy
115
115
  uri = begin
116
- URI.parse(ScraperUtils.australian_proxy.to_s)
117
- rescue URI::InvalidURIError => e
118
- raise URI::InvalidURIError, "Invalid proxy URL format: #{e}"
119
- end
116
+ URI.parse(ScraperUtils.australian_proxy.to_s)
117
+ rescue URI::InvalidURIError => e
118
+ raise URI::InvalidURIError, "Invalid proxy URL format: #{e}"
119
+ end
120
120
  unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
121
121
  raise URI::InvalidURIError, "Proxy URL must start with http:// or https://"
122
122
  end
@@ -177,13 +177,13 @@ module ScraperUtils
177
177
  end
178
178
 
179
179
  def pre_connect_hook(_agent, request)
180
- hostname = (request.respond_to?(:[]) && request['Host']) || 'unknown'
180
+ hostname = (request.respond_to?(:[]) && request["Host"]) || "unknown"
181
181
  @throttler.before_request(hostname)
182
- if DebugUtils.verbose?
183
- ScraperUtils::LogUtils.log(
184
- "Pre Connect request: #{request.inspect}"
185
- )
186
- end
182
+ return unless DebugUtils.verbose?
183
+
184
+ ScraperUtils::LogUtils.log(
185
+ "Pre Connect request: #{request.inspect}"
186
+ )
187
187
  end
188
188
 
189
189
  def post_connect_hook(_agent, uri, response, _body)
@@ -191,7 +191,7 @@ module ScraperUtils
191
191
 
192
192
  status = response.respond_to?(:code) ? response.code.to_i : nil
193
193
  overloaded = [429, 500, 503].include?(status)
194
- hostname = uri.host || 'unknown'
194
+ hostname = uri.host || "unknown"
195
195
  @throttler.after_request(hostname, overloaded: overloaded)
196
196
 
197
197
  if DebugUtils.basic?
@@ -205,9 +205,7 @@ module ScraperUtils
205
205
  def error_hook(_agent, error)
206
206
  # Best-effort: record the error against whatever host we can find
207
207
  # Mechanize errors often carry the URI in the message; fall back to 'unknown'
208
- hostname = if error.respond_to?(:uri)
209
- error.uri.host
210
- end || 'unknown'
208
+ hostname = (error.uri.host if error.respond_to?(:uri)) || "unknown"
211
209
  @throttler.after_request(hostname, overloaded: HostThrottler.overload_error?(error))
212
210
  end
213
211
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "host_throttler"
4
+ require_relative "mechanize_utils/agent_config"
4
5
 
5
6
  module ScraperUtils
6
7
  # Misc Standalone Utilities
@@ -8,6 +9,14 @@ module ScraperUtils
8
9
  THROTTLE_HOSTNAME = "block"
9
10
 
10
11
  class << self
12
+ attr_accessor :default_crawl_delay, :default_max_load
13
+
14
+ def reset_defaults!
15
+ @default_crawl_delay = MechanizeUtils::AgentConfig.default_crawl_delay
16
+ @default_max_load = MechanizeUtils::AgentConfig.default_max_load
17
+ reset_throttler!
18
+ end
19
+
11
20
  # Throttle block to be nice to servers we are scraping.
12
21
  # Time spent inside the block (parsing, saving) counts toward the delay.
13
22
  def throttle_block
@@ -34,8 +43,15 @@ module ScraperUtils
34
43
  private
35
44
 
36
45
  def throttler
37
- @throttler ||= HostThrottler.new
46
+ @throttler ||= HostThrottler.new(
47
+ crawl_delay: default_crawl_delay,
48
+ max_load: default_max_load
49
+ )
38
50
  end
39
51
  end
52
+
53
+ # Initialise defaults after AgentConfig is loaded
54
+ require_relative "mechanize_utils/agent_config"
55
+ reset_defaults!
40
56
  end
41
57
  end
@@ -31,8 +31,6 @@ module ScraperUtils
31
31
  errors.empty? ? nil : errors
32
32
  end
33
33
 
34
- private
35
-
36
34
  def self.validate_presence(record, errors)
37
35
  REQUIRED_FIELDS.each do |field|
38
36
  errors << "#{field} can't be blank" if record[field].to_s.strip.empty?
@@ -47,10 +45,10 @@ module ScraperUtils
47
45
  begin
48
46
  uri = URI.parse(url)
49
47
  unless uri.is_a?(URI::HTTP) && uri.host.to_s != ""
50
- errors << "info_url must be a valid http\/https URL with host"
48
+ errors << "info_url must be a valid http/https URL with host"
51
49
  end
52
50
  rescue URI::InvalidURIError
53
- errors << "info_url must be a valid http\/https URL"
51
+ errors << "info_url must be a valid http/https URL"
54
52
  end
55
53
  end
56
54
 
@@ -58,18 +56,22 @@ module ScraperUtils
58
56
  today = Date.today
59
57
 
60
58
  date_scraped = parse_date(record["date_scraped"])
61
- errors << "Invalid date format for date_scraped: #{record["date_scraped"].inspect} is not a valid ISO 8601 date" if record["date_scraped"] && date_scraped.nil?
59
+ if record["date_scraped"] && date_scraped.nil?
60
+ errors << "Invalid date format for date_scraped: #{record['date_scraped'].inspect} is not a valid ISO 8601 date"
61
+ end
62
62
 
63
63
  date_received = parse_date(record["date_received"])
64
64
  if record["date_received"] && date_received.nil?
65
- errors << "Invalid date format for date_received: #{record["date_received"].inspect} is not a valid ISO 8601 date"
65
+ errors << "Invalid date format for date_received: #{record['date_received'].inspect} is not a valid ISO 8601 date"
66
66
  elsif date_received && date_received.to_date > today
67
- errors << "Invalid date for date_received: #{record["date_received"].inspect} is in the future"
67
+ errors << "Invalid date for date_received: #{record['date_received'].inspect} is in the future"
68
68
  end
69
69
 
70
70
  %w[on_notice_from on_notice_to].each do |field|
71
71
  val = parse_date(record[field])
72
- errors << "Invalid date format for #{field}: #{record[field].inspect} is not a valid ISO 8601 date" if record[field] && val.nil?
72
+ if record[field] && val.nil?
73
+ errors << "Invalid date format for #{field}: #{record[field].inspect} is not a valid ISO 8601 date"
74
+ end
73
75
  end
74
76
  end
75
77
 
@@ -47,41 +47,43 @@ module ScraperUtils
47
47
 
48
48
  PLANNING_KEYWORDS = [
49
49
  # Building types
50
- 'dwelling', 'house', 'unit', 'building', 'structure', 'facility',
50
+ "dwelling", "house", "unit", "building", "structure", "facility",
51
51
  # Modifications
52
- 'addition', 'extension', 'renovation', 'alteration', 'modification',
53
- 'replacement', 'upgrade', 'improvement',
52
+ "addition", "extension", "renovation", "alteration", "modification",
53
+ "replacement", "upgrade", "improvement",
54
54
  # Specific structures
55
- 'carport', 'garage', 'shed', 'pool', 'deck', 'patio', 'pergola',
56
- 'verandah', 'balcony', 'fence', 'wall', 'driveway',
55
+ "carport", "garage", "shed", "pool", "deck", "patio", "pergola",
56
+ "verandah", "balcony", "fence", "wall", "driveway",
57
57
  # Development types
58
- 'subdivision', 'demolition', 'construction', 'development',
58
+ "subdivision", "demolition", "construction", "development",
59
59
  # Services/utilities
60
- 'signage', 'telecommunications', 'stormwater', 'water', 'sewer',
60
+ "signage", "telecommunications", "stormwater", "water", "sewer",
61
61
  # Approvals/certificates
62
- 'certificate', 'approval', 'consent', 'permit'
62
+ "certificate", "approval", "consent", "permit"
63
63
  ].freeze
64
64
 
65
-
66
65
  def self.fetch_url_head(url)
67
66
  agent = Mechanize.new
68
- # FIXME - Allow injection of a check to agree to terms if needed to set a cookie and reget the url
67
+ # FIXME: - Allow injection of a check to agree to terms if needed to set a cookie and reget the url
69
68
  agent.head(url)
70
69
  end
71
70
 
72
71
  def self.fetch_url_with_redirects(url)
73
72
  agent = Mechanize.new
74
- # FIXME - Allow injection of a check to agree to terms if needed to set a cookie and reget the url
73
+ # FIXME: - Allow injection of a check to agree to terms if needed to set a cookie and reget the url
75
74
  agent.get(url)
76
75
  end
77
76
 
78
- def self.authority_label(results, prefix: '', suffix: '')
77
+ def self.authority_label(results, prefix: "", suffix: "")
79
78
  return nil if results.nil?
80
79
 
81
- authority_labels = results.map { |record| record['authority_label'] }.compact.uniq
80
+ authority_labels = results.map { |record| record["authority_label"] }.compact.uniq
82
81
  return nil if authority_labels.empty?
83
82
 
84
- raise "Expected one authority_label, not #{authority_labels.inspect}" if authority_labels.size > 1
83
+ if authority_labels.size > 1
84
+ raise "Expected one authority_label, not #{authority_labels.inspect}"
85
+ end
86
+
85
87
  "#{prefix}#{authority_labels.first}#{suffix}"
86
88
  end
87
89
 
@@ -95,7 +97,8 @@ module ScraperUtils
95
97
  duplicates = groups.select { |_k, g| g.size > 1 }
96
98
  return if duplicates.empty?
97
99
 
98
- raise UnprocessableSite, "Duplicate authority labels: #{duplicates.keys.map(&:inspect).join(', ')}"
100
+ raise UnprocessableSite,
101
+ "Duplicate authority labels: #{duplicates.keys.map(&:inspect).join(', ')}"
99
102
  end
100
103
 
101
104
  # Validates enough addresses are geocodable
@@ -105,28 +108,32 @@ module ScraperUtils
105
108
  # @param ignore_case [Boolean] Ignores case which relaxes suburb check
106
109
  # @param known_suburbs [Array<String>] Known suburbs to detect in address when there is no postcode and no uppercase suburb
107
110
  # @raise RuntimeError if insufficient addresses are geocodable
108
- def self.validate_addresses_are_geocodable!(results, percentage: 50, variation: 3, ignore_case: false, known_suburbs: [])
111
+ def self.validate_addresses_are_geocodable!(results, percentage: 50, variation: 3,
112
+ ignore_case: false, known_suburbs: [])
109
113
  return nil if results.empty?
110
114
 
111
115
  geocodable = results
112
- .map { |record| record["address"] }
113
- .uniq
114
- .count do |text|
115
- ok = ScraperUtils::SpecSupport.geocodable? text, known_suburbs: known_suburbs, ignore_case: ignore_case
116
- if !ok && DebugUtils.verbose?
117
- ScraperUtils::LogUtils.log(
118
- "Address: #{text.inspect} is not geocodeable with #{known_suburbs&.size} know suburbs, ignore_case: #{ignore_case.inspect}"
119
- )
120
- end
121
-
122
- ok
123
- end
116
+ .map { |record| record["address"] }
117
+ .uniq
118
+ .count do |text|
119
+ ok = ScraperUtils::SpecSupport.geocodable? text,
120
+ known_suburbs: known_suburbs, ignore_case: ignore_case
121
+ if !ok && DebugUtils.verbose?
122
+ ScraperUtils::LogUtils.log(
123
+ "Address: #{text.inspect} is not geocodeable with #{known_suburbs&.size} know suburbs, ignore_case: #{ignore_case.inspect}"
124
+ )
125
+ end
126
+
127
+ ok
128
+ end
124
129
  puts "Found #{geocodable} out of #{results.count} unique geocodable addresses " \
125
- "(#{(100.0 * geocodable / results.count).round(1)}%)"
126
- expected = [((percentage.to_f / 100.0) * results.count - variation), 1].max
130
+ "(#{(100.0 * geocodable / results.count).round(1)}%)"
131
+ expected = [(((percentage.to_f / 100.0) * results.count) - variation), 1].max
127
132
  unless geocodable >= expected
128
- raise UnprocessableSite, "Expected at least #{expected} (#{percentage}% - #{variation}) geocodable addresses, got #{geocodable}"
133
+ raise UnprocessableSite,
134
+ "Expected at least #{expected} (#{percentage}% - #{variation}) geocodable addresses, got #{geocodable}"
129
135
  end
136
+
130
137
  geocodable
131
138
  end
132
139
 
@@ -138,10 +145,13 @@ module ScraperUtils
138
145
  # @return [Boolean] True if the address appears to be geocodable.
139
146
  def self.geocodable?(address, ignore_case: false, known_suburbs: [])
140
147
  return false if address.nil? || address.empty?
148
+
141
149
  check_address = ignore_case ? address.upcase : address
142
150
 
143
151
  # Basic structure check - must have a street type or unit/lot, uppercase suburb or postcode, state
144
- has_state = AUSTRALIAN_STATES.any? { |state| check_address.end_with?(" #{state}") || check_address.include?(" #{state} ") }
152
+ has_state = AUSTRALIAN_STATES.any? do |state|
153
+ check_address.end_with?(" #{state}") || check_address.include?(" #{state} ")
154
+ end
145
155
  has_postcode = address.match?(AUSTRALIAN_POSTCODES)
146
156
 
147
157
  # Using the pre-compiled patterns
@@ -154,9 +164,13 @@ module ScraperUtils
154
164
  if ENV["DEBUG"]
155
165
  missing = []
156
166
  missing << "street type" unless has_street_type
157
- missing << "postcode/Uppercase suburb/Known suburb" unless has_postcode || has_uppercase_suburb || has_known_suburb
167
+ unless has_postcode || has_uppercase_suburb || has_known_suburb
168
+ missing << "postcode/Uppercase suburb/Known suburb"
169
+ end
158
170
  missing << "state" unless has_state
159
- puts " address: #{address} is not geocodable, missing #{missing.join(', ')}" if missing.any?
171
+ if missing.any?
172
+ puts " address: #{address} is not geocodable, missing #{missing.join(', ')}"
173
+ end
160
174
  end
161
175
 
162
176
  has_street_type && (has_postcode || has_uppercase_suburb || has_known_suburb) && has_state
@@ -183,17 +197,21 @@ module ScraperUtils
183
197
  return nil if results.empty?
184
198
 
185
199
  descriptions = results
186
- .map { |record| record["description"] }
187
- .uniq
188
- .count do |text|
200
+ .map { |record| record["description"] }
201
+ .uniq
202
+ .count do |text|
189
203
  selected = ScraperUtils::SpecSupport.reasonable_description? text
190
204
  puts " description: #{text} is not reasonable" if ENV["DEBUG"] && !selected
191
205
  selected
192
206
  end
193
207
  puts "Found #{descriptions} out of #{results.count} unique reasonable descriptions " \
194
- "(#{(100.0 * descriptions / results.count).round(1)}%)"
195
- expected = [(percentage.to_f / 100.0) * results.count - variation, 1].max
196
- raise UnprocessableSite, "Expected at least #{expected} (#{percentage}% - #{variation}) reasonable descriptions, got #{descriptions}" unless descriptions >= expected
208
+ "(#{(100.0 * descriptions / results.count).round(1)}%)"
209
+ expected = [((percentage.to_f / 100.0) * results.count) - variation, 1].max
210
+ unless descriptions >= expected
211
+ raise UnprocessableSite,
212
+ "Expected at least #{expected} (#{percentage}% - #{variation}) reasonable descriptions, got #{descriptions}"
213
+ end
214
+
197
215
  descriptions
198
216
  end
199
217
 
@@ -216,7 +234,8 @@ module ScraperUtils
216
234
  # @param bot_check_expected [Boolean] Whether bot protection is acceptable
217
235
  # @yield [String] Optional block to customize URL fetching (e.g., handle terms agreement)
218
236
  # @raise RuntimeError if records don't use the expected URL or it doesn't return 200
219
- def self.validate_uses_one_valid_info_url!(results, expected_url, bot_check_expected: false, &block)
237
+ def self.validate_uses_one_valid_info_url!(results, expected_url, bot_check_expected: false,
238
+ &block)
220
239
  info_urls = results.map { |record| record["info_url"] }.uniq
221
240
 
222
241
  unless info_urls.size == 1
@@ -262,7 +281,8 @@ module ScraperUtils
262
281
  # @param bot_check_expected [Boolean] Whether bot protection is acceptable
263
282
  # @yield [String] Optional block to customize URL fetching (e.g., handle terms agreement)
264
283
  # @raise RuntimeError if insufficient detail checks pass
265
- def self.validate_info_urls_have_expected_details!(results, percentage: 75, variation: 3, bot_check_expected: false, &block)
284
+ def self.validate_info_urls_have_expected_details!(results, percentage: 75, variation: 3,
285
+ bot_check_expected: false, &block)
266
286
  if defined?(VCR)
267
287
  VCR.use_cassette("#{authority_label(results, suffix: '_')}info_urls") do
268
288
  check_info_url_details(results, percentage, variation, bot_check_expected, &block)
@@ -284,15 +304,15 @@ module ScraperUtils
284
304
 
285
305
  # Check for common bot protection indicators
286
306
  bot_indicators = [
287
- 'recaptcha',
288
- 'cloudflare',
289
- 'are you human',
290
- 'bot detection',
291
- 'security check',
292
- 'verify you are human',
293
- 'access denied',
294
- 'blocked',
295
- 'captcha'
307
+ "recaptcha",
308
+ "cloudflare",
309
+ "are you human",
310
+ "bot detection",
311
+ "security check",
312
+ "verify you are human",
313
+ "access denied",
314
+ "blocked",
315
+ "captcha"
296
316
  ]
297
317
 
298
318
  bot_indicators.any? { |indicator| body_lower.include?(indicator) }
@@ -308,10 +328,10 @@ module ScraperUtils
308
328
  return
309
329
  end
310
330
 
311
- raise "Expected 200 response from the one expected info_url, got #{page.code}" unless page.code == "200"
312
- end
331
+ return if page.code == "200"
313
332
 
314
- private
333
+ raise "Expected 200 response from the one expected info_url, got #{page.code}"
334
+ end
315
335
 
316
336
  def self.check_info_url_is_present(results, percentage, variation, &block)
317
337
  count = 0
@@ -337,17 +357,21 @@ module ScraperUtils
337
357
 
338
358
  count += 1
339
359
  if status.between?(200, 299)
340
- puts " OK: #{status}" if ENV['DEBUG']
360
+ puts " OK: #{status}" if ENV["DEBUG"]
341
361
  else
342
362
  failed += 1
343
363
  puts " Failed: #{status}"
344
- min_required = ((percentage.to_f / 100.0) * count - variation).round(0)
364
+ min_required = (((percentage.to_f / 100.0) * count) - variation).round(0)
345
365
  passed = count - failed
346
- raise "Too many failures: #{passed}/#{count} passed (min required: #{min_required})" if passed < min_required
366
+ if passed < min_required
367
+ raise "Too many failures: #{passed}/#{count} passed (min required: #{min_required})"
368
+ end
347
369
  end
348
370
  end
349
371
 
350
- puts "#{(100.0 * (count - failed) / count).round(1)}% info_url checks passed (#{failed}/#{count} failed)!" if count > 0
372
+ return unless count > 0
373
+
374
+ puts "#{(100.0 * (count - failed) / count).round(1)}% info_url checks passed (#{failed}/#{count} failed)!"
351
375
  end
352
376
 
353
377
  def self.check_info_url_details(results, percentage, variation, bot_check_expected, &block)
@@ -367,7 +391,10 @@ module ScraperUtils
367
391
  next
368
392
  end
369
393
 
370
- raise UnprocessableRecord, "Expected 200 response, got #{page.code}" unless page.code == "200"
394
+ unless page.code == "200"
395
+ raise UnprocessableRecord,
396
+ "Expected 200 response, got #{page.code}"
397
+ end
371
398
 
372
399
  page_body = page.body.dup.force_encoding("UTF-8").gsub(/\s\s+/, " ")
373
400
 
@@ -375,34 +402,40 @@ module ScraperUtils
375
402
  count += 1
376
403
  expected = CGI.escapeHTML(record[attribute]).gsub(/\s\s+/, " ")
377
404
  expected2 = case attribute
378
- when 'council_reference'
379
- expected.sub(/\ADA\s*-\s*/, '')
380
- when 'address'
381
- expected.sub(/(\S+)\s+(\S+)\z/, '\2 \1').sub(/,\s*\z/, '') # Handle Lismore post-code/state swap
405
+ when "council_reference"
406
+ expected.sub(/\ADA\s*-\s*/, "")
407
+ when "address"
408
+ expected.sub(/(\S+)\s+(\S+)\z/, '\2 \1').sub(/,\s*\z/, "") # Handle Lismore post-code/state swap
382
409
  else
383
410
  expected
384
411
  end
385
412
  expected3 = case attribute
386
- when 'address'
387
- expected.sub(/\s*,?\s+(VIC|NSW|QLD|SA|TAS|WA|ACT|NT)\z/, '')
413
+ when "address"
414
+ expected.sub(/\s*,?\s+(VIC|NSW|QLD|SA|TAS|WA|ACT|NT)\z/, "")
388
415
  else
389
416
  expected
390
- end.gsub(/\s*,\s*/, ' ').gsub(/\s*-\s*/, '-')
391
- next if page_body.include?(expected) || page_body.include?(expected2) || page_body.gsub(/\s*,\s*/, ' ').gsub(/\s*-\s*/, '-').include?(expected3)
417
+ end.gsub(/\s*,\s*/, " ").gsub(/\s*-\s*/, "-")
418
+ next if page_body.include?(expected) || page_body.include?(expected2) || page_body.gsub(/\s*,\s*/, " ").gsub(
419
+ /\s*-\s*/, "-"
420
+ ).include?(expected3)
392
421
 
393
422
  failed += 1
394
- desc2 = expected2 == expected ? '' : " or #{expected2.inspect}"
395
- desc3 = expected3 == expected ? '' : " or #{expected3.inspect}"
423
+ desc2 = expected2 == expected ? "" : " or #{expected2.inspect}"
424
+ desc3 = expected3 == expected ? "" : " or #{expected3.inspect}"
396
425
  puts " Missing: #{expected.inspect}#{desc2}#{desc3}"
397
- puts " IN: #{page_body}" if ENV['DEBUG']
426
+ puts " IN: #{page_body}" if ENV["DEBUG"]
398
427
 
399
- min_required = ((percentage.to_f / 100.0) * count - variation).round(0)
428
+ min_required = (((percentage.to_f / 100.0) * count) - variation).round(0)
400
429
  passed = count - failed
401
- raise "Too many failures: #{passed}/#{count} passed (min required: #{min_required})" if passed < min_required
430
+ if passed < min_required
431
+ raise "Too many failures: #{passed}/#{count} passed (min required: #{min_required})"
432
+ end
402
433
  end
403
434
  end
404
435
 
405
- puts "#{(100.0 * (count - failed) / count).round(1)}% detail checks passed (#{failed}/#{count} failed)!" if count > 0
436
+ return unless count > 0
437
+
438
+ puts "#{(100.0 * (count - failed) / count).round(1)}% detail checks passed (#{failed}/#{count} failed)!"
406
439
  end
407
440
  end
408
441
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ScraperUtils
4
- VERSION = "0.15.0"
4
+ VERSION = "0.16.0"
5
5
  end
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
13
13
 
14
14
  spec.summary = "planningalerts scraper utilities"
15
15
  spec.description = "Utilities to help make planningalerts scrapers, " \
16
- "especially multi authority scrapers, easier to develop, run and debug."
16
+ "especially multi authority scrapers, easier to develop, run and debug."
17
17
  spec.homepage = "https://github.com/ianheggie-oaf/#{spec.name}"
18
18
  spec.license = "MIT"
19
19
 
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.metadata["homepage_uri"] = spec.homepage
24
24
  spec.metadata["source_code_uri"] = spec.homepage
25
25
  spec.metadata["documentation_uri"] = "https://rubydoc.info/gems/#{spec.name}/#{ScraperUtils::VERSION}"
26
- spec.metadata["changelog_uri"] = "#{spec.metadata["source_code_uri"]}/blob/main/CHANGELOG.md"
26
+ spec.metadata["changelog_uri"] = "#{spec.metadata['source_code_uri']}/blob/main/CHANGELOG.md"
27
27
  else
28
28
  raise "RubyGems 2.0 or newer is required to protect against " \
29
- "public gem pushes."
29
+ "public gem pushes."
30
30
  end
31
31
 
32
32
  # Specify which files should be added to the gem when it is released.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraper_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ian Heggie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-04 00:00:00.000000000 Z
11
+ date: 2026-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -114,7 +114,7 @@ metadata:
114
114
  allowed_push_host: https://rubygems.org
115
115
  homepage_uri: https://github.com/ianheggie-oaf/scraper_utils
116
116
  source_code_uri: https://github.com/ianheggie-oaf/scraper_utils
117
- documentation_uri: https://rubydoc.info/gems/scraper_utils/0.15.0
117
+ documentation_uri: https://rubydoc.info/gems/scraper_utils/0.16.0
118
118
  changelog_uri: https://github.com/ianheggie-oaf/scraper_utils/blob/main/CHANGELOG.md
119
119
  rubygems_mfa_required: 'true'
120
120
  post_install_message: