snapcrawl 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62a293da259afce5690315f27f2bbcd881e495a3d1b5344eb9ed9e2c46bd4a4d
4
- data.tar.gz: d600fdbcd2344e5a19f853cbea67a0d8ad0c365a38d00aa4de8d02dd6e52e5b0
3
+ metadata.gz: 1238ab663146a888fc002379efaae0abaa72f02fc9ef7954bffb79e7dad4b07d
4
+ data.tar.gz: c340cf0b5d1675158077257007f0d58883e9d3e0418f4c0551039fa2e09c6df8
5
5
  SHA512:
6
- metadata.gz: 3ebdb2355480bacd7f7a6faba264a31086e68c1864c692607fdb6fbc11df210eee17af936ab63305484ee46ac473d50b4033be11e995b51b9050b359c81dd906
7
- data.tar.gz: 42a0a9f048fe9b5b1b04426d444710a256ccc8e9a914e3277f062c4ebf760d50a018c1f189e7b0cebced1c236f5d13ca56ab4abbf808a5ec4812bf9a754a9343
6
+ metadata.gz: 5e1c2ca21bbfa5471d58fce0e20d61237a87e14157b156fec0e0ee0b07d8c2a169dd38bffda1c99292bc737fc5cf25c64811e2650de958756c29fd630ca87721
7
+ data.tar.gz: fd7d8e18393e00274f3c145c103e03c0572f7113fab98b7cf3572e99f77d16b88628be0b3050bf98fba311fb05db974453377cb96e8bbd567794426b2a83c099
data/README.md CHANGED
@@ -112,6 +112,14 @@ url_blacklist:
112
112
 
113
113
  # take a screenshot of this CSS selector only
114
114
  css_selector:
115
+
116
+ # when true, ignore SSL related errors
117
+ skip_ssl_verification: false
118
+
119
+ # set to any number of seconds to wait for the page to load before taking
120
+ # a screenshot, leave empty to not wait at all (only needed for pages with
121
+ # animations or other post-load events).
122
+ screenshot_delay:
115
123
  ```
116
124
 
117
125
  ## Contributing / Support
@@ -40,6 +40,8 @@ module Snapcrawl
40
40
  css_selector: nil,
41
41
  log_level: 1,
42
42
  log_color: 'auto',
43
+ skip_ssl_verification: false,
44
+ screenshot_delay: nil
43
45
  }
44
46
  end
45
47
 
@@ -52,7 +52,7 @@ module Snapcrawl
52
52
  end
53
53
 
54
54
  def http_response!
55
- response = cache.get(url) { HTTParty.get url }
55
+ response = cache.get(url) { HTTParty.get url, httparty_options }
56
56
 
57
57
  if !response.success?
58
58
  $logger.warn "http error on !undpur!#{url}!txtrst!, code: !txtylw!#{response.code}!txtrst!, message: #{response.message.strip}"
@@ -66,6 +66,10 @@ module Snapcrawl
66
66
 
67
67
  end
68
68
 
69
+ def httparty_options
70
+ Config.skip_ssl_verification ? { verify: false } : {}
71
+ end
72
+
69
73
  def normalize_links(links)
70
74
  result = []
71
75
 
@@ -12,27 +12,20 @@ module Snapcrawl
12
12
 
13
13
  def save(outfile = nil)
14
14
  outfile ||= "#{url.to_slug}.png"
15
-
16
- fetch_opts = { allowed_status_codes: [404, 401, 403] }
17
- if Config.selector
18
- fetch_opts[:selector] = Config.selector
19
- fetch_opts[:full] = false
20
- end
21
-
22
- webshot_capture url, outfile, fetch_opts
15
+ webshot_capture url, outfile
23
16
  end
24
17
 
25
18
  private
26
19
 
27
- def webshot_capture(url, image_path, fetch_opts)
28
- webshot_capture! url, image_path, fetch_opts
20
+ def webshot_capture(url, image_path)
21
+ webshot_capture! url, image_path
29
22
  rescue => e
30
23
  raise ScreenshotError, "#{e.class} #{e.message}"
31
24
  end
32
25
 
33
- def webshot_capture!(url, image_path, fetch_opts)
26
+ def webshot_capture!(url, image_path)
34
27
  hide_output do
35
- webshot.capture url, image_path, fetch_opts do |magick|
28
+ webshot.capture url, image_path, webshot_options do |magick|
36
29
  magick.combine_options do |c|
37
30
  c.background "white"
38
31
  c.gravity 'north'
@@ -43,6 +36,21 @@ module Snapcrawl
43
36
  end
44
37
  end
45
38
 
39
+ def webshot_options
40
+ result = { allowed_status_codes: [404, 401, 403] }
41
+
42
+ if Config.selector
43
+ result[:selector] = Config.selector
44
+ result[:full] = false
45
+ end
46
+
47
+ if Config.screenshot_delay
48
+ result[:timeout] = Config.screenshot_delay
49
+ end
50
+
51
+ result
52
+ end
53
+
46
54
  def webshot
47
55
  @webshot ||= Webshot::Screenshot.instance
48
56
  end
@@ -39,3 +39,11 @@ url_blacklist:
39
39
 
40
40
  # take a screenshot of this CSS selector only
41
41
  css_selector:
42
+
43
+ # when true, ignore SSL related errors
44
+ skip_ssl_verification: false
45
+
46
+ # set to any number of seconds to wait for the page to load before taking
47
+ # a screenshot, leave empty to not wait at all (only needed for pages with
48
+ # animations or other post-load events).
49
+ screenshot_delay:
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-25 00:00:00.000000000 Z
11
+ date: 2021-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colsole