spidr 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +11 -0
  3. data/.github/workflows/ruby.yml +26 -0
  4. data/.gitignore +4 -5
  5. data/ChangeLog.md +19 -1
  6. data/Gemfile +7 -4
  7. data/LICENSE.txt +1 -1
  8. data/README.md +136 -79
  9. data/Rakefile +1 -0
  10. data/gemspec.yml +7 -0
  11. data/lib/spidr/agent/actions.rb +3 -1
  12. data/lib/spidr/agent/events.rb +3 -1
  13. data/lib/spidr/agent/filters.rb +57 -56
  14. data/lib/spidr/agent/robots.rb +2 -0
  15. data/lib/spidr/agent/sanitizers.rb +7 -8
  16. data/lib/spidr/agent.rb +232 -108
  17. data/lib/spidr/auth_credential.rb +2 -0
  18. data/lib/spidr/auth_store.rb +9 -7
  19. data/lib/spidr/cookie_jar.rb +7 -5
  20. data/lib/spidr/extensions/uri.rb +3 -1
  21. data/lib/spidr/extensions.rb +3 -1
  22. data/lib/spidr/page/content_types.rb +53 -0
  23. data/lib/spidr/page/cookies.rb +2 -0
  24. data/lib/spidr/page/html.rb +21 -20
  25. data/lib/spidr/page/status_codes.rb +15 -11
  26. data/lib/spidr/page.rb +3 -1
  27. data/lib/spidr/proxy.rb +8 -14
  28. data/lib/spidr/rules.rb +7 -8
  29. data/lib/spidr/session_cache.rb +26 -22
  30. data/lib/spidr/settings/proxy.rb +22 -6
  31. data/lib/spidr/settings/timeouts.rb +2 -0
  32. data/lib/spidr/settings/user_agent.rb +2 -0
  33. data/lib/spidr/settings.rb +5 -3
  34. data/lib/spidr/spidr.rb +22 -11
  35. data/lib/spidr/version.rb +3 -1
  36. data/lib/spidr.rb +5 -3
  37. data/spec/agent_spec.rb +356 -7
  38. data/spec/example_page.rb +2 -0
  39. data/spec/page/content_types_spec.rb +22 -0
  40. data/spec/page/html_spec.rb +255 -51
  41. data/spec/page/status_codes_spec.rb +4 -4
  42. data/spec/proxy_spec.rb +2 -2
  43. data/spec/settings/proxy_examples.rb +31 -11
  44. data/spec/spec_helper.rb +3 -0
  45. data/spidr.gemspec +1 -4
  46. metadata +8 -7
  47. data/.travis.yml +0 -16
data/lib/spidr/agent.rb CHANGED
@@ -1,14 +1,16 @@
1
- require 'spidr/settings/user_agent'
2
- require 'spidr/agent/sanitizers'
3
- require 'spidr/agent/filters'
4
- require 'spidr/agent/events'
5
- require 'spidr/agent/actions'
6
- require 'spidr/agent/robots'
7
- require 'spidr/page'
8
- require 'spidr/session_cache'
9
- require 'spidr/cookie_jar'
10
- require 'spidr/auth_store'
11
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/user_agent'
4
+ require_relative 'agent/sanitizers'
5
+ require_relative 'agent/filters'
6
+ require_relative 'agent/events'
7
+ require_relative 'agent/actions'
8
+ require_relative 'agent/robots'
9
+ require_relative 'page'
10
+ require_relative 'session_cache'
11
+ require_relative 'cookie_jar'
12
+ require_relative 'auth_store'
13
+ require_relative 'spidr'
12
14
 
13
15
  require 'openssl'
14
16
  require 'net/http'
@@ -19,12 +21,12 @@ module Spidr
19
21
 
20
22
  include Settings::UserAgent
21
23
 
22
- # HTTP Host Header to use
24
+ # HTTP Host `Header` to use
23
25
  #
24
26
  # @return [String]
25
27
  attr_accessor :host_header
26
28
 
27
- # HTTP Host Headers to use for specific hosts
29
+ # HTTP `Host` Headers to use for specific hosts
28
30
  #
29
31
  # @return [Hash{String,Regexp => String}]
30
32
  attr_reader :host_headers
@@ -96,70 +98,110 @@ module Spidr
96
98
  #
97
99
  # Creates a new Agent object.
98
100
  #
99
- # @param [Hash] options
100
- # Additional options
101
+ # @param [String, nil] host_header
102
+ # The HTTP `Host` header to use with each request.
101
103
  #
102
- # @option options [Integer] :open_timeout (Spidr.open_timeout)
103
- # Optional open timeout.
104
+ # @param [Hash{String,Regexp => String}] host_headers
105
+ # The HTTP `Host` headers to use for specific hosts.
104
106
  #
105
- # @option options [Integer] :read_timeout (Spidr.read_timeout)
107
+ # @param [Hash{String => String}] default_headers
108
+ # Default headers to set for every request.
109
+ #
110
+ # @param [String, nil] user_agent
111
+ # The `User-Agent` string to send with each requests.
112
+ #
113
+ # @param [String, nil] referer
114
+ # The `Referer` URL to send with each request.
115
+ #
116
+ # @param [Integer, nil] open_timeout
117
+ # Optional open connection timeout.
118
+ #
119
+ # @param [Integer, nil] read_timeout
106
120
  # Optional read timeout.
107
121
  #
108
- # @option options [Integer] :ssl_timeout (Spidr.ssl_timeout)
109
- # Optional ssl timeout.
122
+ # @param [Integer, nil] ssl_timeout
123
+ # Optional SSL connection timeout.
110
124
  #
111
- # @option options [Integer] :continue_timeout (Spidr.continue_timeout)
125
+ # @param [Integer, nil] continue_timeout
112
126
  # Optional continue timeout.
113
127
  #
114
- # @option options [Integer] :keep_alive_timeout (Spidr.keep_alive_timeout)
115
- # Optional keep_alive timeout.
128
+ # @param [Integer, nil] keep_alive_timeout
129
+ # Optional `Keep-Alive` timeout.
116
130
  #
117
- # @option options [Hash] :proxy (Spidr.proxy)
131
+ # @param [Spidr::Proxy, Hash, URI::HTTP, String, nil] proxy
118
132
  # The proxy information to use.
119
133
  #
120
- # @option :proxy [String] :host
134
+ # @option proxy [String] :host
121
135
  # The host the proxy is running on.
122
136
  #
123
- # @option :proxy [Integer] :port
137
+ # @option proxy [Integer] :port (8080)
124
138
  # The port the proxy is running on.
125
139
  #
126
- # @option :proxy [String] :user
140
+ # @option proxy [String, nil] :user
127
141
  # The user to authenticate as with the proxy.
128
142
  #
129
- # @option :proxy [String] :password
143
+ # @option proxy [String, nil] :password
130
144
  # The password to authenticate with.
131
145
  #
132
- # @option options [Hash{String => String}] :default_headers
133
- # Default headers to set for every request.
146
+ # @param [Integer] delay
147
+ # The number of seconds to pause between each request.
134
148
  #
135
- # @option options [String] :host_header
136
- # The HTTP Host header to use with each request.
149
+ # @param [Integer, nil] limit
150
+ # The maximum number of pages to visit.
137
151
  #
138
- # @option options [Hash{String,Regexp => String}] :host_headers
139
- # The HTTP Host headers to use for specific hosts.
152
+ # @param [Integer, nil] max_depth
153
+ # The maximum link depth to follow.
140
154
  #
141
- # @option options [String] :user_agent (Spidr.user_agent)
142
- # The User-Agent string to send with each requests.
155
+ # @param [Set, Array, nil] queue
156
+ # The initial queue of URLs to visit.
143
157
  #
144
- # @option options [String] :referer
145
- # The Referer URL to send with each request.
158
+ # @param [Set, Array, nil] history
159
+ # The initial list of visited URLs.
146
160
  #
147
- # @option options [Integer] :delay (0)
148
- # The number of seconds to pause between each request.
161
+ # @param [Boolean] strip_fragments
162
+ # Controls whether to strip the fragment components from the URLs.
149
163
  #
150
- # @option options [Set, Array] :queue
151
- # The initial queue of URLs to visit.
164
+ # @param [Boolean] strip_query
165
+ # Controls whether to strip the query components from the URLs.
152
166
  #
153
- # @option options [Set, Array] :history
154
- # The initial list of visited URLs.
167
+ # @param [Array<String>] schemes
168
+ # The list of acceptable URI schemes to visit.
169
+ # The `https` scheme will be ignored if `net/https` cannot be loaded.
155
170
  #
156
- # @option options [Integer] :limit
157
- # The maximum number of pages to visit.
171
+ # @param [String] host
172
+ # The host-name to visit.
158
173
  #
159
- # @option options [Integer] :max_depth
160
- # The maximum link depth to follow.
174
+ # @param [Array<String, Regexp, Proc>] hosts
175
+ # The patterns which match the host-names to visit.
161
176
  #
162
- # @option options [Boolean] :robots (Spidr.robots?)
177
+ # @param [Array<String, Regexp, Proc>] ignore_hosts
178
+ # The patterns which match the host-names to not visit.
179
+ #
180
+ # @param [Array<Integer, Regexp, Proc>] ports
181
+ # The patterns which match the ports to visit.
182
+ #
183
+ # @param [Array<Integer, Regexp, Proc>] ignore_ports
184
+ # The patterns which match the ports to not visit.
185
+ #
186
+ # @param [Array<String, Regexp, Proc>] links
187
+ # The patterns which match the links to visit.
188
+ #
189
+ # @param [Array<String, Regexp, Proc>] ignore_links
190
+ # The patterns which match the links to not visit.
191
+ #
192
+ # @param [Array<String, Regexp, Proc>] urls
193
+ # The patterns which match the URLs to visit.
194
+ #
195
+ # @param [Array<String, Regexp, Proc>] ignore_urls
196
+ # The patterns which match the URLs to not visit.
197
+ #
198
+ # @param [Array<String, Regexp, Proc>] exts
199
+ # The patterns which match the URI path extensions to visit.
200
+ #
201
+ # @param [Array<String, Regexp, Proc>] ignore_exts
202
+ # The patterns which match the URI path extensions to not visit.
203
+ #
204
+ # @param [Boolean] robots
163
205
  # Specifies whether `robots.txt` should be honored.
164
206
  #
165
207
  # @yield [agent]
@@ -169,58 +211,99 @@ module Spidr
169
211
  # @yieldparam [Agent] agent
170
212
  # The newly created agent.
171
213
  #
172
- # @see #initialize_sanitizers
173
- # @see #initialize_filters
174
- # @see #initialize_actions
175
- # @see #initialize_events
176
- #
177
- def initialize(options={})
178
- @host_header = options[:host_header]
179
- @host_headers = {}
180
-
181
- if options[:host_headers]
182
- @host_headers.merge!(options[:host_headers])
183
- end
184
-
185
- @default_headers = {}
186
-
187
- if options[:default_headers]
188
- @default_headers.merge!(options[:default_headers])
189
- end
190
-
191
- @user_agent = options.fetch(:user_agent,Spidr.user_agent)
192
- @referer = options[:referer]
193
-
194
- @sessions = SessionCache.new(options)
214
+ def initialize(# header keyword arguments
215
+ host_header: nil,
216
+ host_headers: {},
217
+ default_headers: {},
218
+ user_agent: Spidr.user_agent,
219
+ referer: nil,
220
+ # session cache keyword arguments
221
+ proxy: Spidr.proxy,
222
+ open_timeout: Spidr.open_timeout,
223
+ ssl_timeout: Spidr.ssl_timeout,
224
+ read_timeout: Spidr.read_timeout,
225
+ continue_timeout: Spidr.continue_timeout,
226
+ keep_alive_timeout: Spidr.keep_alive_timeout,
227
+ # spidering controls keyword arguments
228
+ delay: 0,
229
+ limit: nil,
230
+ max_depth: nil,
231
+ # history keyword arguments
232
+ queue: nil,
233
+ history: nil,
234
+ # sanitizer keyword arguments
235
+ strip_fragments: true,
236
+ strip_query: false,
237
+ # filtering keyword arguments
238
+ schemes: self.class.default_schemes,
239
+ host: nil,
240
+ hosts: nil,
241
+ ignore_hosts: nil,
242
+ ports: nil,
243
+ ignore_ports: nil,
244
+ links: nil,
245
+ ignore_links: nil,
246
+ urls: nil,
247
+ ignore_urls: nil,
248
+ exts: nil,
249
+ ignore_exts: nil,
250
+ # robots keyword arguments
251
+ robots: Spidr.robots?)
252
+ @host_header = host_header
253
+ @host_headers = host_headers
254
+
255
+ @default_headers = default_headers
256
+
257
+ @user_agent = user_agent
258
+ @referer = referer
259
+
260
+ @sessions = SessionCache.new(
261
+ proxy: proxy,
262
+ open_timeout: open_timeout,
263
+ ssl_timeout: ssl_timeout,
264
+ read_timeout: read_timeout,
265
+ continue_timeout: continue_timeout,
266
+ keep_alive_timeout: keep_alive_timeout
267
+ )
195
268
  @cookies = CookieJar.new
196
269
  @authorized = AuthStore.new
197
270
 
198
271
  @running = false
199
- @delay = options.fetch(:delay,0)
272
+ @delay = delay
200
273
  @history = Set[]
201
274
  @failures = Set[]
202
275
  @queue = []
203
276
 
204
- @limit = options[:limit]
277
+ @limit = limit
205
278
  @levels = Hash.new(0)
206
- @max_depth = options[:max_depth]
207
-
208
- if options[:queue]
209
- self.queue = options[:queue]
210
- end
211
-
212
- if options[:history]
213
- self.history = options[:history]
214
- end
215
-
216
- initialize_sanitizers(options)
217
- initialize_filters(options)
218
- initialize_actions(options)
219
- initialize_events(options)
220
-
221
- if options.fetch(:robots,Spidr.robots?)
222
- initialize_robots
223
- end
279
+ @max_depth = max_depth
280
+
281
+ self.queue = queue if queue
282
+ self.history = history if history
283
+
284
+ initialize_sanitizers(
285
+ strip_fragments: strip_fragments,
286
+ strip_query: strip_query
287
+ )
288
+
289
+ initialize_filters(
290
+ schemes: schemes,
291
+ host: host,
292
+ hosts: hosts,
293
+ ignore_hosts: ignore_hosts,
294
+ ports: ports,
295
+ ignore_ports: ignore_ports,
296
+ links: links,
297
+ ignore_links: ignore_links,
298
+ urls: urls,
299
+ ignore_urls: ignore_urls,
300
+ exts: exts,
301
+ ignore_exts: ignore_exts
302
+ )
303
+ initialize_actions
304
+ initialize_events
305
+
306
+ initialize_robots if robots
224
307
 
225
308
  yield self if block_given?
226
309
  end
@@ -231,8 +314,8 @@ module Spidr
231
314
  # @param [URI::HTTP, String] url
232
315
  # The URL to start spidering at.
233
316
  #
234
- # @param [Hash] options
235
- # Additional options. See {Agent#initialize}.
317
+ # @param [Hash{Symbol => Object}] kwargs
318
+ # Additional keyword arguments. See {Agent#initialize}.
236
319
  #
237
320
  # @yield [agent]
238
321
  # If a block is given, it will be passed the newly created agent
@@ -241,12 +324,16 @@ module Spidr
241
324
  # @yieldparam [Agent] agent
242
325
  # The newly created agent.
243
326
  #
327
+ # @return [Agent]
328
+ # The created agent object.
329
+ #
244
330
  # @see #initialize
245
331
  # @see #start_at
246
332
  #
247
- def self.start_at(url,options={},&block)
248
- agent = new(options,&block)
333
+ def self.start_at(url,**kwargs,&block)
334
+ agent = new(**kwargs,&block)
249
335
  agent.start_at(url)
336
+ return agent
250
337
  end
251
338
 
252
339
  #
@@ -255,8 +342,8 @@ module Spidr
255
342
  # @param [URI::HTTP, String] url
256
343
  # The web-site to spider.
257
344
  #
258
- # @param [Hash] options
259
- # Additional options. See {Agent#initialize}.
345
+ # @param [Hash{Symbol => Object}] kwargs
346
+ # Additional keyword arguments. See {Agent#initialize}.
260
347
  #
261
348
  # @yield [agent]
262
349
  # If a block is given, it will be passed the newly created agent
@@ -265,13 +352,17 @@ module Spidr
265
352
  # @yieldparam [Agent] agent
266
353
  # The newly created agent.
267
354
  #
355
+ # @return [Agent]
356
+ # The created agent object.
357
+ #
268
358
  # @see #initialize
269
359
  #
270
- def self.site(url,options={},&block)
360
+ def self.site(url,**kwargs,&block)
271
361
  url = URI(url)
272
362
 
273
- agent = new(options.merge(host: url.host),&block)
363
+ agent = new(host: url.host, **kwargs, &block)
274
364
  agent.start_at(url)
365
+ return agent
275
366
  end
276
367
 
277
368
  #
@@ -280,8 +371,35 @@ module Spidr
280
371
  # @param [String] name
281
372
  # The host-name to spider.
282
373
  #
283
- # @param [Hash] options
284
- # Additional options. See {Agent#initialize}.
374
+ # @param [Hash{Symbol => Object}] kwargs
375
+ # Additional keyword arguments. See {Agent#initialize}.
376
+ #
377
+ # @yield [agent]
378
+ # If a block is given, it will be passed the newly created agent
379
+ # before it begins spidering.
380
+ #
381
+ # @yieldparam [Agent] agent
382
+ # The newly created agent.
383
+ #
384
+ # @return [Agent]
385
+ # The created agent object.
386
+ #
387
+ # @see #initialize
388
+ #
389
+ def self.host(name,**kwargs,&block)
390
+ agent = new(host: name, **kwargs, &block)
391
+ agent.start_at(URI::HTTP.build(host: name, path: '/'))
392
+ return agent
393
+ end
394
+
395
+ #
396
+ # Creates a new agent and spiders the entire domain.
397
+ #
398
+ # @param [String] name
399
+ # The top-level domain to spider.
400
+ #
401
+ # @param [Hash{Symbol => Object}] kwargs
402
+ # Additional keyword arguments. See {Agent#initialize}.
285
403
  #
286
404
  # @yield [agent]
287
405
  # If a block is given, it will be passed the newly created agent
@@ -290,11 +408,17 @@ module Spidr
290
408
  # @yieldparam [Agent] agent
291
409
  # The newly created agent.
292
410
  #
411
+ # @return [Agent]
412
+ # The created agent object.
413
+ #
293
414
  # @see #initialize
294
415
  #
295
- def self.host(name,options={},&block)
296
- agent = new(options.merge(host: name),&block)
416
+ # @since 0.7.0
417
+ #
418
+ def self.domain(name,**kwargs,&block)
419
+ agent = new(host: /(^|\.)#{Regexp.escape(name)}$/, **kwargs, &block)
297
420
  agent.start_at(URI::HTTP.build(host: name, path: '/'))
421
+ return agent
298
422
  end
299
423
 
300
424
  #
@@ -314,10 +438,10 @@ module Spidr
314
438
  #
315
439
  # Sets the proxy information that the agent uses.
316
440
  #
317
- # @param [Proxy] new_proxy
441
+ # @param [Proxy, Hash, URI::HTTP, String, nil] new_proxy
318
442
  # The new proxy information.
319
443
  #
320
- # @return [Hash]
444
+ # @return [Proxy]
321
445
  # The new proxy information.
322
446
  #
323
447
  # @see SessionCache#proxy=
@@ -534,7 +658,7 @@ module Spidr
534
658
  def enqueue(url,level=0)
535
659
  url = sanitize_url(url)
536
660
 
537
- if (!(queued?(url)) && visit?(url))
661
+ if (!queued?(url) && visit?(url))
538
662
  link = url.to_s
539
663
 
540
664
  begin
@@ -633,7 +757,7 @@ module Spidr
633
757
  end
634
758
 
635
759
  #
636
- # Visits a given URL, and enqueus the links recovered from the URL
760
+ # Visits a given URL, and enqueues the links recovered from the URL
637
761
  # to be visited later.
638
762
  #
639
763
  # @param [URI::HTTP, String] url
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents HTTP Authentication credentials for a website.
@@ -1,6 +1,8 @@
1
- require 'spidr/extensions/uri'
2
- require 'spidr/auth_credential'
3
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
4
+ require_relative 'auth_credential'
5
+ require_relative 'page'
4
6
 
5
7
  require 'base64'
6
8
 
@@ -20,7 +22,7 @@ module Spidr
20
22
  @credentials = {}
21
23
  end
22
24
 
23
- #
25
+ #
24
26
  # Given a URL, return the most specific matching auth credential.
25
27
  #
26
28
  # @param [URI] url
@@ -54,7 +56,7 @@ module Spidr
54
56
  return nil
55
57
  end
56
58
 
57
- #
59
+ #
58
60
  # Add an auth credential to the store for supplied base URL.
59
61
  #
60
62
  # @param [URI] url
@@ -109,7 +111,7 @@ module Spidr
109
111
  # or `nil` if no authorization exists.
110
112
  #
111
113
  # @param [URI] url
112
- # The url.
114
+ # The URL.
113
115
  #
114
116
  # @return [String, nil]
115
117
  # The base64 encoded authorizatio string or `nil`.
@@ -122,7 +124,7 @@ module Spidr
122
124
  end
123
125
  end
124
126
 
125
- #
127
+ #
126
128
  # Clear the contents of the auth store.
127
129
  #
128
130
  # @return [AuthStore]
@@ -1,4 +1,6 @@
1
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'page'
2
4
 
3
5
  require 'set'
4
6
 
@@ -42,8 +44,8 @@ module Spidr
42
44
  @params.each(&block)
43
45
  end
44
46
 
45
- #
46
- # Return all relevant cookies in a single string for the
47
+ #
48
+ # Return all relevant cookies in a single string for the
47
49
  # named host or domain (in browser request format).
48
50
  #
49
51
  # @param [String] host
@@ -59,7 +61,7 @@ module Spidr
59
61
  @params[host] ||= {}
60
62
  end
61
63
 
62
- #
64
+ #
63
65
  # Add a cookie to the jar for a particular domain.
64
66
  #
65
67
  # @param [String] host
@@ -166,7 +168,7 @@ module Spidr
166
168
  return host_cookies
167
169
  end
168
170
 
169
- #
171
+ #
170
172
  # Clear out the jar, removing all stored cookies.
171
173
  #
172
174
  # @since 0.2.2
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'strscan'
3
5
 
@@ -58,7 +60,7 @@ module URI
58
60
  unless stack.empty?
59
61
  "#{leading_slash}#{stack.join('/')}#{trailing_slash}"
60
62
  else
61
- '/'
63
+ String.new('/')
62
64
  end
63
65
  end
64
66
  end
@@ -1 +1,3 @@
1
- require 'spidr/extensions/uri'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -221,5 +223,56 @@ module Spidr
221
223
  def zip?
222
224
  is_content_type?('application/zip')
223
225
  end
226
+
227
+ #
228
+ # Determines if the page is a PNG image.
229
+ #
230
+ # @return [Boolean]
231
+ # Specifies whether the page is a PNG image.
232
+ #
233
+ # @since 0.7.0
234
+ #
235
+ def png?
236
+ is_content_type?('image/png')
237
+ end
238
+
239
+ #
240
+ # Determines if the page is a GIF image.
241
+ #
242
+ # @return [Boolean]
243
+ # Specifies whether the page is a GIF image.
244
+ #
245
+ # @since 0.7.0
246
+ #
247
+ def gif?
248
+ is_content_type?('image/gif')
249
+ end
250
+
251
+ #
252
+ # Determines if the page is a JPEG image.
253
+ #
254
+ # @return [Boolean]
255
+ # Specifies whether the page is a JPEG image.
256
+ #
257
+ # @since 0.7.0
258
+ #
259
+ def jpeg?
260
+ is_content_type?('image/jpeg')
261
+ end
262
+
263
+ #
264
+ # Determines if the page is a ICO image.
265
+ #
266
+ # @return [Boolean]
267
+ # Specifies whether the page is a ICO image.
268
+ #
269
+ # @since 0.7.0
270
+ #
271
+ def ico?
272
+ is_content_type?('image/x-icon') ||
273
+ is_content_type?('image/vnd.microsoft.icon')
274
+ end
275
+
276
+ alias icon? ico?
224
277
  end
225
278
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  module Spidr