scraper_utils 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,8 +13,8 @@ module ScraperUtils
13
13
  # @param start_time [Time] When this scraping attempt was started
14
14
  # @param attempt [Integer] 1 for first run, 2 for first retry, 3 for last retry (without proxy)
15
15
  # @param authorities [Array<Symbol>] List of authorities attempted to scrape
16
- # @param exceptions [Hash > Exception] Any exception that occurred during scraping
17
- # DataQualityMonitor.stats is checked for :saved and :unprocessed entries
16
+ # @param exceptions [Hash{Symbol => Exception}] Any exceptions that occurred during scraping
17
+ # `DataQualityMonitor.stats` is checked for :saved and :unprocessed entries
18
18
  # @return [void]
19
19
  def self.log_scraping_run(start_time, attempt, authorities, exceptions)
20
20
  raise ArgumentError, "Invalid start time" unless start_time.is_a?(Time)
@@ -75,39 +75,39 @@ module ScraperUtils
75
75
 
76
76
  # Report on the results
77
77
  # @param authorities [Array<Symbol>] List of authorities attempted to scrape
78
- # @param exceptions [Hash > Exception] Any exception that occurred during scraping
79
- # DataQualityMonitor.stats is checked for :saved and :unprocessed entries
78
+ # @param exceptions [Hash{Symbol => Exception}] Any exceptions that occurred during scraping
79
+ # `DataQualityMonitor.stats` is checked for :saved and :unprocessed entries
80
80
  # @return [void]
81
81
  def self.report_on_results(authorities, exceptions)
82
- expect_bad = ENV["MORPH_EXPECT_BAD"]&.split(",")&.map(&:strip)&.map(&:to_sym) || []
82
+ if ENV["MORPH_EXPECT_BAD"]
83
+ expect_bad = ENV["MORPH_EXPECT_BAD"].split(",").map(&:strip).map(&:to_sym)
84
+ end
85
+ expect_bad ||= []
83
86
 
84
- puts "MORPH_EXPECT_BAD=#{ENV.fetch('MORPH_EXPECT_BAD', nil)}" if expect_bad.any?
87
+ $stderr.flush
88
+ puts "MORPH_EXPECT_BAD=#{ENV.fetch('MORPH_EXPECT_BAD', nil)}"
85
89
 
86
90
  # Print summary table
87
91
  puts "\nScraping Summary:"
88
92
  summary_format = "%-20s %6s %6s %s"
89
93
 
90
- puts summary_format % %w[Authority OK Bad Exception]
91
- puts summary_format % ['-' * 20, '-' * 6, '-' * 6, '-' * 50]
94
+ puts format(summary_format, 'Authority', 'OK', 'Bad', 'Exception')
95
+ puts format(summary_format, "-" * 20, "-" * 6, "-" * 6, "-" * 50)
92
96
 
93
97
  authorities.each do |authority|
94
98
  stats = ScraperUtils::DataQualityMonitor.stats&.fetch(authority, {}) || {}
95
-
99
+
96
100
  ok_records = stats[:saved] || 0
97
101
  bad_records = stats[:unprocessed] || 0
98
-
102
+
99
103
  expect_bad_prefix = expect_bad.include?(authority) ? "[EXPECT BAD] " : ""
100
104
  exception_msg = if exceptions[authority]
101
105
  "#{exceptions[authority].class} - #{exceptions[authority].message}"
102
106
  else
103
107
  "-"
104
108
  end
105
- puts summary_format % [
106
- authority.to_s,
107
- ok_records,
108
- bad_records,
109
- "#{expect_bad_prefix}#{exception_msg}".slice(0, 70)
110
- ]
109
+ puts format(summary_format, authority.to_s, ok_records, bad_records,
110
+ "#{expect_bad_prefix}#{exception_msg}".slice(0, 70))
111
111
  end
112
112
  puts
113
113
 
@@ -120,7 +120,8 @@ module ScraperUtils
120
120
  end
121
121
 
122
122
  if unexpected_working.any?
123
- errors << "WARNING: Remove #{unexpected_working.join(',')} from MORPH_EXPECT_BAD as it now works!"
123
+ errors <<
124
+ "WARNING: Remove #{unexpected_working.join(',')} from MORPH_EXPECT_BAD as it now works!"
124
125
  end
125
126
 
126
127
  # Check for authorities with unexpected errors
@@ -137,6 +138,7 @@ module ScraperUtils
137
138
  end
138
139
  end
139
140
 
141
+ $stdout.flush
140
142
  if errors.any?
141
143
  errors << "See earlier output for details"
142
144
  raise errors.join("\n")
@@ -23,6 +23,11 @@ module ScraperUtils
23
23
  # random_delay: 10
24
24
  # )
25
25
  class AgentConfig
26
+ DEFAULT_TIMEOUT = 60
27
+ DEFAULT_RANDOM_DELAY = 5
28
+ DEFAULT_MAX_LOAD = 33.3
29
+ MAX_LOAD_CAP = 50.0
30
+
26
31
  # Class-level defaults that can be modified
27
32
  class << self
28
33
  # @return [Integer] Default timeout in seconds for agent connections
@@ -62,65 +67,68 @@ module ScraperUtils
62
67
  # Reset all configuration options to their default values
63
68
  # @return [void]
64
69
  def reset_defaults!
65
- @default_timeout = 60
66
- @default_compliant_mode = true
67
- @default_random_delay = 3
68
- @default_max_load = 20.0
69
- @default_disable_ssl_certificate_check = false
70
- @default_australian_proxy = nil
71
- @default_user_agent = nil
70
+ @default_timeout = ENV.fetch('MORPH_TIMEOUT', DEFAULT_TIMEOUT).to_i # 60
71
+ @default_compliant_mode = ENV.fetch('MORPH_NOT_COMPLIANT', nil).to_s.empty? # true
72
+ @default_random_delay = ENV.fetch('MORPH_RANDOM_DELAY', DEFAULT_RANDOM_DELAY).to_i # 5
73
+ @default_max_load = ENV.fetch('MORPH_MAX_LOAD', DEFAULT_MAX_LOAD).to_f # 33.3
74
+ @default_disable_ssl_certificate_check = !ENV.fetch('MORPH_DISABLE_SSL_CHECK', nil).to_s.empty? # false
75
+ @default_australian_proxy = !ENV.fetch('MORPH_USE_PROXY', nil).to_s.empty? # false
76
+ @default_user_agent = ENV.fetch('MORPH_USER_AGENT', nil) # Uses Mechanize user agent
72
77
  end
73
78
  end
74
79
 
75
80
  # Set defaults on load
76
81
  reset_defaults!
77
82
 
78
-
79
83
  # @return [String] User agent string
80
84
  attr_reader :user_agent
81
85
 
82
86
  # Give access for testing
83
87
 
84
- attr_reader :max_load
85
- attr_reader :min_random
86
- attr_reader :max_random
88
+ attr_reader :max_load, :min_random, :max_random
87
89
 
88
- # Creates configuration for a Mechanize agent with sensible defaults
89
- # @param timeout [Integer, nil] Timeout for agent connections (default: 60 unless changed)
90
- # @param compliant_mode [Boolean, nil] Comply with headers and robots.txt (default: true unless changed)
91
- # @param random_delay [Integer, nil] Average random delay in seconds (default: 3 unless changed)
92
- # @param max_load [Float, nil] Maximum server load percentage (nil = no response delay, default: 20%)
90
+ # Creates Mechanize agent configuration with sensible defaults overridable via configure
91
+ # @param timeout [Integer, nil] Timeout for agent connections (default: 60)
92
+ # @param compliant_mode [Boolean, nil] Comply with headers and robots.txt (default: true)
93
+ # @param random_delay [Integer, nil] Average random delay in seconds (default: 3)
94
+ # @param max_load [Float, nil] Maximum server load percentage (nil = no delay, default: 20%)
93
95
  # When compliant_mode is true, max_load is capped at 33%
94
- # @param disable_ssl_certificate_check [Boolean, nil] Skip SSL verification (default: false unless changed)
95
- # @param australian_proxy [Boolean, nil] Use proxy if available (default: false unless changed)
96
+ # @param disable_ssl_certificate_check [Boolean, nil] Skip SSL verification (default: false)
97
+ # @param australian_proxy [Boolean, nil] Use proxy if available (default: false)
96
98
  # @param user_agent [String, nil] Configure Mechanize user agent
97
99
  def initialize(timeout: nil,
98
100
  compliant_mode: nil,
99
101
  random_delay: nil,
100
102
  max_load: nil,
101
103
  disable_ssl_certificate_check: nil,
102
- australian_proxy: false,
104
+ australian_proxy: nil,
103
105
  user_agent: nil)
104
106
  @timeout = timeout.nil? ? self.class.default_timeout : timeout
105
107
  @compliant_mode = compliant_mode.nil? ? self.class.default_compliant_mode : compliant_mode
106
108
  @random_delay = random_delay.nil? ? self.class.default_random_delay : random_delay
107
109
  @max_load = max_load.nil? ? self.class.default_max_load : max_load
108
- @max_load = [@max_load || 20.0, 33.0].min if @compliant_mode
110
+ @max_load = [@max_load || DEFAULT_MAX_LOAD, MAX_LOAD_CAP].min if @compliant_mode
109
111
  @user_agent = user_agent.nil? ? self.class.default_user_agent : user_agent
110
112
 
111
- @disable_ssl_certificate_check = disable_ssl_certificate_check.nil? ?
112
- self.class.default_disable_ssl_certificate_check :
113
+ @disable_ssl_certificate_check = if disable_ssl_certificate_check.nil?
114
+ self.class.default_disable_ssl_certificate_check
115
+ else
113
116
  disable_ssl_certificate_check
114
- @australian_proxy = australian_proxy.nil? ? self.class.default_australian_proxy : australian_proxy
117
+ end
118
+ @australian_proxy = if australian_proxy.nil?
119
+ self.class.default_australian_proxy
120
+ else
121
+ australian_proxy
122
+ end
115
123
 
116
124
  # Validate proxy URL format if proxy will be used
117
125
  @australian_proxy &&= !ScraperUtils.australian_proxy.to_s.empty?
118
126
  if @australian_proxy
119
127
  uri = begin
120
- URI.parse(ScraperUtils.australian_proxy.to_s)
121
- rescue URI::InvalidURIError => e
122
- raise URI::InvalidURIError, "Invalid proxy URL format: #{e.message}"
123
- end
128
+ URI.parse(ScraperUtils.australian_proxy.to_s)
129
+ rescue URI::InvalidURIError => e
130
+ raise URI::InvalidURIError, "Invalid proxy URL format: #{e.message}"
131
+ end
124
132
  unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
125
133
  raise URI::InvalidURIError, "Proxy URL must start with http:// or https://"
126
134
  end
@@ -135,7 +143,7 @@ module ScraperUtils
135
143
  end
136
144
 
137
145
  today = Date.today.strftime("%Y-%m-%d")
138
- @user_agent = ENV['MORPH_USER_AGENT']&.sub("TODAY", today)
146
+ @user_agent = ENV.fetch("MORPH_USER_AGENT", nil)&.sub("TODAY", today)
139
147
  if @compliant_mode
140
148
  version = ScraperUtils::VERSION
141
149
  @user_agent ||= "Mozilla/5.0 (compatible; ScraperUtils/#{version} #{today}; +https://github.com/ianheggie-oaf/scraper_utils)"
@@ -159,7 +167,8 @@ module ScraperUtils
159
167
  if @compliant_mode
160
168
  agent.user_agent = user_agent
161
169
  agent.request_headers ||= {}
162
- agent.request_headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
170
+ agent.request_headers["Accept"] =
171
+ "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
163
172
  agent.request_headers["Upgrade-Insecure-Requests"] = "1"
164
173
  end
165
174
  if @australian_proxy
@@ -178,32 +187,39 @@ module ScraperUtils
178
187
  def display_options
179
188
  display_args = []
180
189
  display_args << "timeout=#{@timeout}" if @timeout
181
- if @australian_proxy
182
- display_args << "australian_proxy=#{@australian_proxy.inspect}"
183
- elsif ScraperUtils.australian_proxy.to_s.empty?
184
- display_args << "#{ScraperUtils::AUSTRALIAN_PROXY_ENV_VAR} not set"
185
- else
186
- display_args << "australian_proxy=#{@australian_proxy.inspect}"
187
- end
190
+ display_args << if ScraperUtils.australian_proxy.to_s.empty? && !@australian_proxy
191
+ "#{ScraperUtils::AUSTRALIAN_PROXY_ENV_VAR} not set"
192
+ else
193
+ "australian_proxy=#{@australian_proxy.inspect}"
194
+ end
188
195
  display_args << "compliant_mode" if @compliant_mode
189
196
  display_args << "random_delay=#{@random_delay}" if @random_delay
190
197
  display_args << "max_load=#{@max_load}%" if @max_load
191
198
  display_args << "disable_ssl_certificate_check" if @disable_ssl_certificate_check
192
199
  display_args << "default args" if display_args.empty?
193
- ScraperUtils::FiberScheduler.log "Configuring Mechanize agent with #{display_args.join(', ')}"
200
+ ScraperUtils::FiberScheduler.log(
201
+ "Configuring Mechanize agent with #{display_args.join(', ')}"
202
+ )
194
203
  end
195
204
 
196
205
  def pre_connect_hook(_agent, request)
197
206
  @connection_started_at = Time.now
198
- ScraperUtils::FiberScheduler.log "Pre Connect request: #{request.inspect} at #{@connection_started_at}" if ENV["DEBUG"]
207
+ return unless DebugUtils.verbose?
208
+
209
+ ScraperUtils::FiberScheduler.log(
210
+ "Pre Connect request: #{request.inspect} at #{@connection_started_at}"
211
+ )
199
212
  end
200
213
 
201
214
  def post_connect_hook(_agent, uri, response, _body)
202
215
  raise ArgumentError, "URI must be present in post-connect hook" unless uri
203
216
 
204
217
  response_time = Time.now - @connection_started_at
205
- if ENV["DEBUG"]
206
- ScraperUtils::FiberScheduler.log "Post Connect uri: #{uri.inspect}, response: #{response.inspect} after #{response_time} seconds"
218
+ if DebugUtils.basic?
219
+ ScraperUtils::FiberScheduler.log(
220
+ "Post Connect uri: #{uri.inspect}, response: #{response.inspect} " \
221
+ "after #{response_time} seconds"
222
+ )
207
223
  end
208
224
 
209
225
  if @robots_checker&.disallowed?(uri)
@@ -214,18 +230,23 @@ module ScraperUtils
214
230
  delays = {
215
231
  robot_txt: @robots_checker&.crawl_delay&.round(3),
216
232
  max_load: @adaptive_delay&.next_delay(uri, response_time)&.round(3),
217
- random: (@min_random ? (rand(@min_random..@max_random) ** 2).round(3) : nil)
233
+ random: (@min_random ? (rand(@min_random..@max_random)**2).round(3) : nil)
218
234
  }
219
235
  @delay = delays.values.compact.max
220
236
  if @delay&.positive?
221
- puts "Delaying #{@delay} seconds, max of #{delays.inspect}" if ENV["DEBUG"]
222
- sleep(@delay)
237
+ $stderr.flush
238
+ ScraperUtils::FiberScheduler.log("Delaying #{@delay} seconds, max of #{delays.inspect}") if ENV["DEBUG"]
239
+ $stdout.flush
240
+ ScraperUtils::FiberScheduler.delay(@delay)
223
241
  end
224
242
 
225
243
  response
226
244
  end
227
245
 
228
246
  def verify_proxy_works(agent)
247
+ $stderr.flush
248
+ $stdout.flush
249
+ FiberScheduler.log "Checking proxy works..."
229
250
  my_ip = MechanizeUtils.public_ip(agent)
230
251
  begin
231
252
  IPAddr.new(my_ip)
@@ -233,17 +254,17 @@ module ScraperUtils
233
254
  raise "Invalid public IP address returned by proxy check: #{my_ip.inspect}: #{e}"
234
255
  end
235
256
  ScraperUtils::FiberScheduler.log "Proxy is using IP address: #{my_ip.inspect}"
236
- my_headers = MechanizeUtils::public_headers(agent)
257
+ my_headers = MechanizeUtils.public_headers(agent)
237
258
  begin
238
259
  # Check response is JSON just to be safe!
239
260
  headers = JSON.parse(my_headers)
240
261
  puts "Proxy is passing headers:"
241
- puts JSON.pretty_generate(headers['headers'])
262
+ puts JSON.pretty_generate(headers["headers"])
242
263
  rescue JSON::ParserError => e
243
264
  puts "Couldn't parse public_headers: #{e}! Raw response:"
244
265
  puts my_headers.inspect
245
266
  end
246
- rescue Net::OpenTimeout, Timeout::Error => e
267
+ rescue Timeout::Error => e # Includes Net::OpenTimeout
247
268
  raise "Proxy check timed out: #{e}"
248
269
  rescue Errno::ECONNREFUSED, Net::HTTP::Persistent::Error => e
249
270
  raise "Failed to connect to proxy: #{e}"
@@ -45,20 +45,28 @@ module ScraperUtils
45
45
  #
46
46
  # @param agent [Mechanize, nil] Mechanize agent to use for IP lookup or nil when clearing cache
47
47
  # @param force [Boolean] Force a new IP lookup, by clearing cache first
48
- # @return [String] The public IP address
48
+ # @return [String, nil] The public IP address
49
49
  def self.public_ip(agent = nil, force: false)
50
50
  @public_ip = nil if force
51
- @public_ip ||= agent&.get(PUBLIC_IP_URL)&.body&.strip if agent
51
+ @public_ip ||= begin
52
+ response = agent&.get(PUBLIC_IP_URL)
53
+ response&.body&.strip
54
+ end
55
+ @public_ip
52
56
  end
53
57
 
54
58
  # Retrieves and logs the headers that make it through the proxy
55
59
  #
56
60
  # @param agent [Mechanize, nil] Mechanize agent to use for IP lookup or nil when clearing cache
57
61
  # @param force [Boolean] Force a new IP lookup, by clearing cache first
58
- # @return [String] The list of headers in json format
62
+ # @return [String, nil] The list of headers in json format
59
63
  def self.public_headers(agent = nil, force: false)
60
64
  @public_headers = nil if force
61
- @public_headers ||= agent&.get(HEADERS_ECHO_URL)&.body&.strip if agent
65
+ @public_headers ||= begin
66
+ response = agent&.get(HEADERS_ECHO_URL)
67
+ response&.body&.strip
68
+ end
69
+ @public_headers
62
70
  end
63
71
  end
64
72
  end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ScraperUtils
4
+ # Provides utilities for randomizing processing order in scrapers,
5
+ # particularly helpful for distributing load and avoiding predictable patterns
6
+ module RandomizeUtils
7
+ # Returns a randomized version of the input collection when in production mode,
8
+ # or the original collection when in test/sequential mode
9
+ #
10
+ # @param collection [Array, Enumerable] Collection of items to potentially randomize
11
+ # @return [Array] Randomized or original collection depending on environment
12
+ def self.randomize_order(collection)
13
+ return collection.to_a if sequential?
14
+
15
+ collection.to_a.shuffle
16
+ end
17
+
18
+ # Checks if sequential processing is enabled
19
+ #
20
+ # @return [Boolean] true when in test mode or MORPH_PROCESS_SEQUENTIALLY is set
21
+ def self.sequential?
22
+ @sequential = !ENV["MORPH_PROCESS_SEQUENTIALLY"].to_s.empty? if @sequential.nil?
23
+ @sequential || false
24
+ end
25
+
26
+ # Explicitly set sequential mode for testing
27
+ #
28
+ # @param value [Boolean, nil] true to enable sequential mode, false to disable, nil to clear cache
29
+ # @return [Boolean, nil]
30
+ def self.sequential=(value)
31
+ @sequential = value
32
+ end
33
+ end
34
+ end
@@ -14,8 +14,10 @@ module ScraperUtils
14
14
  # * Crawl-delay from either User-agent: bot name or * (default)
15
15
  def initialize(user_agent)
16
16
  @user_agent = extract_user_agent(user_agent).downcase
17
- if ENV["DEBUG"]
18
- ScraperUtils::FiberScheduler.log "Checking robots.txt for user agent prefix: #{@user_agent} (case insensitive)"
17
+ if DebugUtils.basic?
18
+ ScraperUtils::FiberScheduler.log(
19
+ "Checking robots.txt for user agent prefix: #{@user_agent} (case insensitive)"
20
+ )
19
21
  end
20
22
  @rules = {} # domain -> {rules: [], delay: int}
21
23
  @delay = nil # Delay from last robots.txt check
@@ -73,7 +75,11 @@ module ScraperUtils
73
75
  @rules[domain] = rules
74
76
  rules
75
77
  rescue StandardError => e
76
- ScraperUtils::FiberScheduler.log "Warning: Failed to fetch robots.txt for #{domain}: #{e.message}" if ENV["DEBUG"]
78
+ if DebugUtils.basic?
79
+ ScraperUtils::FiberScheduler.log(
80
+ "WARNING: Failed to fetch robots.txt for #{domain}: #{e.message}"
81
+ )
82
+ end
77
83
  nil
78
84
  end
79
85
  end
@@ -141,4 +147,3 @@ module ScraperUtils
141
147
  end
142
148
  end
143
149
  end
144
-
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ScraperUtils
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/scraper_utils.rb CHANGED
@@ -5,8 +5,11 @@ require "scraper_utils/authority_utils"
5
5
  require "scraper_utils/data_quality_monitor"
6
6
  require "scraper_utils/db_utils"
7
7
  require "scraper_utils/debug_utils"
8
+ require "scraper_utils/fiber_scheduler"
8
9
  require "scraper_utils/log_utils"
10
+ require "scraper_utils/mechanize_utils/agent_config"
9
11
  require "scraper_utils/mechanize_utils"
12
+ require "scraper_utils/randomize_utils"
10
13
  require "scraper_utils/robots_checker"
11
14
  require "scraper_utils/version"
12
15
 
@@ -15,9 +18,6 @@ module ScraperUtils
15
18
  # Constants for configuration on Morph.io
16
19
  AUSTRALIAN_PROXY_ENV_VAR = "MORPH_AUSTRALIAN_PROXY"
17
20
 
18
- # Enable debug locally, not on morph.io
19
- DEBUG_ENV_VAR = "DEBUG"
20
-
21
21
  # Fatal Error
22
22
  class Error < StandardError
23
23
  end
@@ -31,13 +31,6 @@ module ScraperUtils
31
31
  class UnprocessableRecord < Error
32
32
  end
33
33
 
34
- # Check if debug mode is enabled
35
- #
36
- # @return [Boolean] Whether debug mode is active
37
- def self.debug?
38
- !ENV[DEBUG_ENV_VAR].to_s.empty?
39
- end
40
-
41
34
  def self.australian_proxy
42
35
  ap = ENV[AUSTRALIAN_PROXY_ENV_VAR].to_s
43
36
  ap.empty? ? nil : ap
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraper_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ian Heggie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-27 00:00:00.000000000 Z
11
+ date: 2025-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -74,16 +74,21 @@ files:
74
74
  - SPECS.md
75
75
  - bin/console
76
76
  - bin/setup
77
+ - docs/example_scrape_with_fibers.rb
78
+ - docs/example_scraper.rb
77
79
  - lib/scraper_utils.rb
78
80
  - lib/scraper_utils/adaptive_delay.rb
79
81
  - lib/scraper_utils/authority_utils.rb
82
+ - lib/scraper_utils/cycle_utils.rb
80
83
  - lib/scraper_utils/data_quality_monitor.rb
84
+ - lib/scraper_utils/date_range_utils.rb
81
85
  - lib/scraper_utils/db_utils.rb
82
86
  - lib/scraper_utils/debug_utils.rb
83
87
  - lib/scraper_utils/fiber_scheduler.rb
84
88
  - lib/scraper_utils/log_utils.rb
85
89
  - lib/scraper_utils/mechanize_utils.rb
86
90
  - lib/scraper_utils/mechanize_utils/agent_config.rb
91
+ - lib/scraper_utils/randomize_utils.rb
87
92
  - lib/scraper_utils/robots_checker.rb
88
93
  - lib/scraper_utils/version.rb
89
94
  - scraper_utils.gemspec