powerdlz23 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/grell/.rspec +2 -0
  2. package/grell/.travis.yml +28 -0
  3. package/grell/CHANGELOG.md +111 -0
  4. package/grell/Gemfile +7 -0
  5. package/grell/LICENSE.txt +22 -0
  6. package/grell/README.md +213 -0
  7. package/grell/Rakefile +2 -0
  8. package/grell/grell.gemspec +36 -0
  9. package/grell/lib/grell/capybara_driver.rb +44 -0
  10. package/grell/lib/grell/crawler.rb +83 -0
  11. package/grell/lib/grell/crawler_manager.rb +84 -0
  12. package/grell/lib/grell/grell_logger.rb +10 -0
  13. package/grell/lib/grell/page.rb +275 -0
  14. package/grell/lib/grell/page_collection.rb +62 -0
  15. package/grell/lib/grell/rawpage.rb +62 -0
  16. package/grell/lib/grell/reader.rb +18 -0
  17. package/grell/lib/grell/version.rb +3 -0
  18. package/grell/lib/grell.rb +11 -0
  19. package/grell/spec/lib/capybara_driver_spec.rb +38 -0
  20. package/grell/spec/lib/crawler_manager_spec.rb +174 -0
  21. package/grell/spec/lib/crawler_spec.rb +361 -0
  22. package/grell/spec/lib/page_collection_spec.rb +159 -0
  23. package/grell/spec/lib/page_spec.rb +418 -0
  24. package/grell/spec/lib/reader_spec.rb +43 -0
  25. package/grell/spec/spec_helper.rb +66 -0
  26. package/heartmagic/config.py +1 -0
  27. package/heartmagic/heart.py +3 -0
  28. package/heartmagic/pytransform/__init__.py +483 -0
  29. package/heartmagic/pytransform/_pytransform.dll +0 -0
  30. package/heartmagic/pytransform/_pytransform.so +0 -0
  31. package/httpStatusCode/README.md +2 -0
  32. package/httpStatusCode/httpStatusCode.js +4 -0
  33. package/httpStatusCode/reasonPhrases.js +344 -0
  34. package/httpStatusCode/statusCodes.js +344 -0
  35. package/package.json +1 -1
  36. package/snapcrawl/.changelog.old.md +157 -0
  37. package/snapcrawl/.gitattributes +1 -0
  38. package/snapcrawl/.github/workflows/test.yml +41 -0
  39. package/snapcrawl/.rspec +3 -0
  40. package/snapcrawl/.rubocop.yml +23 -0
  41. package/snapcrawl/CHANGELOG.md +182 -0
  42. package/snapcrawl/Gemfile +15 -0
  43. package/snapcrawl/LICENSE +21 -0
  44. package/snapcrawl/README.md +135 -0
  45. package/snapcrawl/Runfile +35 -0
  46. package/snapcrawl/bin/snapcrawl +25 -0
  47. package/snapcrawl/lib/snapcrawl/cli.rb +52 -0
  48. package/snapcrawl/lib/snapcrawl/config.rb +60 -0
  49. package/snapcrawl/lib/snapcrawl/crawler.rb +98 -0
  50. package/snapcrawl/lib/snapcrawl/dependencies.rb +21 -0
  51. package/snapcrawl/lib/snapcrawl/exceptions.rb +5 -0
  52. package/snapcrawl/lib/snapcrawl/log_helpers.rb +36 -0
  53. package/snapcrawl/lib/snapcrawl/page.rb +118 -0
  54. package/snapcrawl/lib/snapcrawl/pretty_logger.rb +11 -0
  55. package/snapcrawl/lib/snapcrawl/refinements/pair_split.rb +26 -0
  56. package/snapcrawl/lib/snapcrawl/refinements/string_refinements.rb +13 -0
  57. package/snapcrawl/lib/snapcrawl/screenshot.rb +73 -0
  58. package/snapcrawl/lib/snapcrawl/templates/config.yml +49 -0
  59. package/snapcrawl/lib/snapcrawl/templates/docopt.txt +26 -0
  60. package/snapcrawl/lib/snapcrawl/version.rb +3 -0
  61. package/snapcrawl/lib/snapcrawl.rb +20 -0
  62. package/snapcrawl/snapcrawl.gemspec +27 -0
  63. package/snapcrawl/snapcrawl.yml +41 -0
  64. package/snapcrawl/spec/README.md +16 -0
  65. package/snapcrawl/spec/approvals/bin/help +26 -0
  66. package/snapcrawl/spec/approvals/bin/usage +4 -0
  67. package/snapcrawl/spec/approvals/cli/usage +4 -0
  68. package/snapcrawl/spec/approvals/config/defaults +15 -0
  69. package/snapcrawl/spec/approvals/config/minimal +15 -0
  70. package/snapcrawl/spec/approvals/integration/blacklist +14 -0
  71. package/snapcrawl/spec/approvals/integration/default-config +14 -0
  72. package/snapcrawl/spec/approvals/integration/depth-0 +6 -0
  73. package/snapcrawl/spec/approvals/integration/depth-3 +6 -0
  74. package/snapcrawl/spec/approvals/integration/log-color-no +6 -0
  75. package/snapcrawl/spec/approvals/integration/screenshot-error +3 -0
  76. package/snapcrawl/spec/approvals/integration/whitelist +14 -0
  77. package/snapcrawl/spec/approvals/models/pretty_logger/colors +1 -0
  78. package/snapcrawl/spec/fixtures/config/minimal.yml +4 -0
  79. package/snapcrawl/spec/server/config.ru +97 -0
  80. package/snapcrawl/spec/snapcrawl/bin_spec.rb +15 -0
  81. package/snapcrawl/spec/snapcrawl/cli_spec.rb +9 -0
  82. package/snapcrawl/spec/snapcrawl/config_spec.rb +26 -0
  83. package/snapcrawl/spec/snapcrawl/integration_spec.rb +65 -0
  84. package/snapcrawl/spec/snapcrawl/page_spec.rb +89 -0
  85. package/snapcrawl/spec/snapcrawl/pretty_logger_spec.rb +19 -0
  86. package/snapcrawl/spec/snapcrawl/refinements/pair_split_spec.rb +27 -0
  87. package/snapcrawl/spec/snapcrawl/refinements/string_refinements_spec.rb +29 -0
  88. package/snapcrawl/spec/snapcrawl/screenshot_spec.rb +62 -0
  89. package/snapcrawl/spec/spec_helper.rb +22 -0
  90. package/snapcrawl/spec/spec_mixin.rb +10 -0
@@ -0,0 +1,118 @@
1
+ require 'addressable/uri'
2
+ require 'fileutils'
3
+ require 'httparty'
4
+ require 'lightly'
5
+ require 'nokogiri'
6
+
7
+ module Snapcrawl
8
+ class Page
9
+ using StringRefinements
10
+
11
+ attr_reader :url, :depth
12
+
13
+ EXTENSION_BLACKLIST = 'png|gif|jpg|pdf|zip'
14
+ PROTOCOL_BLACKLIST = 'mailto|tel'
15
+
16
+ def initialize(url, depth: 0)
17
+ @url = url.protocolize
18
+ @depth = depth
19
+ end
20
+
21
+ def valid?
22
+ http_response&.success?
23
+ end
24
+
25
+ def site
26
+ @site ||= Addressable::URI.parse(url).site
27
+ end
28
+
29
+ def path
30
+ @path ||= Addressable::URI.parse(url).request_uri
31
+ end
32
+
33
+ def links
34
+ return nil unless valid?
35
+
36
+ doc = Nokogiri::HTML http_response.body
37
+ normalize_links doc.css('a')
38
+ end
39
+
40
+ def pages
41
+ return nil unless valid?
42
+
43
+ links.map { |link| Page.new link, depth: depth + 1 }
44
+ end
45
+
46
+ def save_screenshot(outfile)
47
+ return false unless valid?
48
+
49
+ Screenshot.new(url).save outfile
50
+ end
51
+
52
+ private
53
+
54
+ def http_response
55
+ @http_response ||= http_response!
56
+ end
57
+
58
+ def http_response!
59
+ response = cache.get(url) { HTTParty.get url, httparty_options }
60
+
61
+ unless response.success?
62
+ $logger.warn "http error on mu`#{url}`, code: y`#{response.code}`, message: #{response.message.strip}"
63
+ end
64
+
65
+ response
66
+ rescue => e
67
+ $logger.error "http error on mu`#{url}` - r`#{e.class}`: #{e.message}"
68
+ nil
69
+ end
70
+
71
+ def httparty_options
72
+ Config.skip_ssl_verification ? { verify: false } : {}
73
+ end
74
+
75
+ def normalize_links(links)
76
+ result = []
77
+
78
+ links.each do |link|
79
+ valid_link = normalize_link link
80
+ result << valid_link if valid_link
81
+ end
82
+
83
+ result.uniq
84
+ end
85
+
86
+ def normalize_link(link)
87
+ link = link.attribute('href').to_s.dup
88
+
89
+ # Remove #hash
90
+ link.gsub!(/#.+$/, '')
91
+ return nil if link.empty?
92
+
93
+ # Remove links to specific extensions and protocols
94
+ return nil if /\.(#{EXTENSION_BLACKLIST})(\?.*)?$/o.match?(link)
95
+ return nil if /^(#{PROTOCOL_BLACKLIST}):/o.match?(link)
96
+
97
+ # Strip spaces
98
+ link.strip!
99
+
100
+ # Convert relative links to absolute
101
+ begin
102
+ link = Addressable::URI.join(url, link).to_s.dup
103
+ rescue => e
104
+ $logger.warn "r`#{e.class}`: #{e.message} on #{path} (link: #{link})"
105
+ return nil
106
+ end
107
+
108
+ # Keep only links in our base domain
109
+ return nil unless link.include? site
110
+
111
+ link
112
+ end
113
+
114
+ def cache
115
+ Lightly.new life: Config.cache_life
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,11 @@
1
+ require 'logger'
2
+
3
+ module Snapcrawl
4
+ class PrettyLogger
5
+ extend LogHelpers
6
+
7
+ def self.new
8
+ Logger.new($stdout, formatter: log_formatter, level: Config.log_level)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ module Snapcrawl
2
+ module PairSplit
3
+ refine Array do
4
+ def pair_split
5
+ false_values = %w[no false]
6
+ true_values = %w[yes true]
7
+
8
+ to_h do |pair|
9
+ key, value = pair.split '='
10
+
11
+ value = if /^\d+$/.match?(value)
12
+ value.to_i
13
+ elsif false_values.include? value
14
+ false
15
+ elsif true_values.include? value
16
+ true
17
+ else
18
+ value
19
+ end
20
+
21
+ [key, value]
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,13 @@
1
+ module Snapcrawl
2
+ module StringRefinements
3
+ refine String do
4
+ def to_slug
5
+ downcase.gsub(/[^a-z0-9]+/, '-')
6
+ end
7
+
8
+ def protocolize
9
+ /^http/.match?(self) ? self : "http://#{self}"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,73 @@
1
+ require 'webshot'
2
+
3
+ module Snapcrawl
4
+ class Screenshot
5
+ using StringRefinements
6
+
7
+ attr_reader :url
8
+
9
+ def initialize(url)
10
+ @url = url
11
+ end
12
+
13
+ def save(outfile = nil)
14
+ outfile ||= "#{url.to_slug}.png"
15
+ webshot_capture url, outfile
16
+ end
17
+
18
+ private
19
+
20
+ def webshot_capture(url, image_path)
21
+ webshot_capture! url, image_path
22
+ rescue => e
23
+ raise ScreenshotError, "#{e.class} #{e.message}"
24
+ end
25
+
26
+ def webshot_capture!(url, image_path)
27
+ hide_output do
28
+ webshot.capture url, image_path, webshot_options do |magick|
29
+ magick.combine_options do |c|
30
+ c.background 'white'
31
+ c.gravity 'north'
32
+ c.quality 100
33
+ c.extent Config.height.positive? ? "#{Config.width}x#{Config.height}" : "#{Config.width}x"
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ def webshot_options
40
+ result = { allowed_status_codes: [404, 401, 403] }
41
+
42
+ if Config.css_selector
43
+ result[:selector] = Config.css_selector
44
+ result[:full] = false
45
+ end
46
+
47
+ if Config.screenshot_delay
48
+ result[:timeout] = Config.screenshot_delay
49
+ end
50
+
51
+ result
52
+ end
53
+
54
+ def webshot
55
+ @webshot ||= Webshot::Screenshot.instance
56
+ end
57
+
58
+ # The webshot gem messes with stdout/stderr streams so we keep it in
59
+ # check by using this method. Also, in some sites (e.g. uown.co) it
60
+ # prints some output to stdout, this is why we override $stdout for
61
+ # the duration of the run.
62
+ def hide_output
63
+ keep_stdout = $stdout
64
+ keep_stderr = $stderr
65
+ $stdout = StringIO.new
66
+ $stderr = StringIO.new
67
+ yield
68
+ ensure
69
+ $stdout = keep_stdout
70
+ $stderr = keep_stderr
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,49 @@
1
+ # All values below are the default values
2
+
3
+ # log level (0-4) 0=DEBUG 1=INFO 2=WARN 3=ERROR 4=FATAL
4
+ log_level: 1
5
+
6
+ # log_color (yes, no, auto)
7
+ # yes = always show log color
8
+ # no = never use colors
9
+ # auto = only use colors when running in an interactive terminal
10
+ log_color: auto
11
+
12
+ # number of levels to crawl, 0 means capture only the root URL
13
+ depth: 1
14
+
15
+ # screenshot width in pixels
16
+ width: 1280
17
+
18
+ # screenshot height in pixels, 0 means the entire height
19
+ height: 0
20
+
21
+ # number of seconds to consider the page cache and its screenshot fresh
22
+ cache_life: 86400
23
+
24
+ # where to store the HTML page cache
25
+ cache_dir: cache
26
+
27
+ # where to store screenshots
28
+ snaps_dir: snaps
29
+
30
+ # screenshot filename template, where '%{url}' will be replaced with a
31
+ # slug version of the URL (no need to include the .png extension)
32
+ name_template: '%{url}'
33
+
34
+ # urls not matching this regular expression will be ignored
35
+ url_whitelist:
36
+
37
+ # urls matching this regular expression will be ignored
38
+ url_blacklist:
39
+
40
+ # take a screenshot of this CSS selector only
41
+ css_selector:
42
+
43
+ # when true, ignore SSL related errors
44
+ skip_ssl_verification: false
45
+
46
+ # set to any number of seconds to wait for the page to load before taking
47
+ # a screenshot, leave empty to not wait at all (only needed for pages with
48
+ # animations or other post-load events).
49
+ screenshot_delay:
@@ -0,0 +1,26 @@
1
+ Snapcrawl
2
+
3
+ Usage:
4
+ snapcrawl URL [--config FILE] [SETTINGS...]
5
+ snapcrawl -h | --help
6
+ snapcrawl -v | --version
7
+
8
+ Options:
9
+ -c, --config FILE
10
+ Path to config file, with or without the .yml extension.
11
+ A sample file will be created if not found.
12
+ The default filename is 'snapcrawl.yml'.
13
+
14
+ -h, --help
15
+ Show this screen
16
+
17
+ -v, --version
18
+ Show version number
19
+
20
+ Settings:
21
+ Provide any of the options available in the config as 'key=value'.
22
+
23
+ Examples:
24
+ snapcrawl example.com
25
+ snapcrawl example.com --config simple
26
+ snapcrawl example.com depth=1 log_level=2 width=768
@@ -0,0 +1,3 @@
1
+ module Snapcrawl
2
+ VERSION = '0.5.4'
3
+ end
@@ -0,0 +1,20 @@
1
+ require 'snapcrawl/version'
2
+ require 'snapcrawl/exceptions'
3
+ require 'snapcrawl/refinements/pair_split'
4
+ require 'snapcrawl/refinements/string_refinements'
5
+ require 'snapcrawl/log_helpers'
6
+ require 'snapcrawl/pretty_logger'
7
+ require 'snapcrawl/dependencies'
8
+ require 'snapcrawl/config'
9
+ require 'snapcrawl/screenshot'
10
+ require 'snapcrawl/page'
11
+ require 'snapcrawl/crawler'
12
+ require 'snapcrawl/cli'
13
+
14
+ if ENV['BYEBUG']
15
+ require 'byebug'
16
+ require 'lp'
17
+ end
18
+
19
+ Snapcrawl::Config.load
20
+ $logger = Snapcrawl::PrettyLogger.new
@@ -0,0 +1,27 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'snapcrawl/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'snapcrawl'
7
+ s.version = Snapcrawl::VERSION
8
+ s.summary = 'Crawl a website and take screenshots (CLI + Library)'
9
+ s.description = 'Snapcrawl is a command line utility for crawling a website and saving screenshots.'
10
+ s.authors = ['Danny Ben Shitrit']
11
+ s.email = 'db@dannyben.com'
12
+ s.files = Dir['README.md', 'lib/**/*']
13
+ s.executables = ['snapcrawl']
14
+ s.homepage = 'https://github.com/DannyBen/snapcrawl'
15
+ s.license = 'MIT'
16
+ s.required_ruby_version = '>= 3.0'
17
+
18
+ s.add_runtime_dependency 'addressable', '~> 2.7'
19
+ s.add_runtime_dependency 'colsole', '>= 0.8.1', '< 2'
20
+ s.add_runtime_dependency 'docopt', '~> 0.6'
21
+ s.add_runtime_dependency 'httparty', '~> 0.21'
22
+ s.add_runtime_dependency 'lightly', '~> 0.3'
23
+ s.add_runtime_dependency 'nokogiri', '~> 1.10'
24
+ s.add_runtime_dependency 'sting', '~> 0.4'
25
+ s.add_runtime_dependency 'webshot', '~> 0.1'
26
+ s.metadata['rubygems_mfa_required'] = 'true'
27
+ end
@@ -0,0 +1,41 @@
1
+ # All values below are the default values
2
+
3
+ # log level (0-4) 0=DEBUG 1=INFO 2=WARN 3=ERROR 4=FATAL
4
+ log_level: 1
5
+
6
+ # log_color (yes, no, auto)
7
+ # yes = always show log color
8
+ # no = never use colors
9
+ # auto = only use colors when running in an interactive terminal
10
+ log_color: auto
11
+
12
+ # number of levels to crawl, 0 means capture only the root URL
13
+ depth: 1
14
+
15
+ # screenshot width in pixels
16
+ width: 1280
17
+
18
+ # screenshot height in pixels, 0 means the entire height
19
+ height: 0
20
+
21
+ # number of seconds to consider the page cache and its screenshot fresh
22
+ cache_life: 86400
23
+
24
+ # where to store the HTML page cache
25
+ cache_dir: cache
26
+
27
+ # where to store screenshots
28
+ snaps_dir: snaps
29
+
30
+ # screenshot filename template, where '%{url}' will be replaced with a
31
+ # slug version of the URL (no need to include the .png extension)
32
+ name_template: '%{url}'
33
+
34
+ # urls not matching this regular expression will be ignored
35
+ url_whitelist:
36
+
37
+ # urls matching this regular expression will be ignored
38
+ url_blacklist:
39
+
40
+ # take a screenshot of this CSS selector only
41
+ css_selector:
@@ -0,0 +1,16 @@
1
+ Snapcrawl Tests
2
+ ==================================================
3
+
4
+ Running Tests
5
+ --------------------------------------------------
6
+
7
+ ```shell
8
+ # Start a dummy sinatra server
9
+ $ run server start -d
10
+
11
+ # Run all tests
12
+ $ run spec
13
+
14
+ # Stop the server if you are done testing
15
+ $ run server stop
16
+ ```
@@ -0,0 +1,26 @@
1
+ Snapcrawl
2
+
3
+ Usage:
4
+ snapcrawl URL [--config FILE] [SETTINGS...]
5
+ snapcrawl -h | --help
6
+ snapcrawl -v | --version
7
+
8
+ Options:
9
+ -c, --config FILE
10
+ Path to config file, with or without the .yml extension.
11
+ A sample file will be created if not found.
12
+ The default filename is 'snapcrawl.yml'.
13
+
14
+ -h, --help
15
+ Show this screen
16
+
17
+ -v, --version
18
+ Show version number
19
+
20
+ Settings:
21
+ Provide any of the options available in the config as 'key=value'.
22
+
23
+ Examples:
24
+ snapcrawl example.com
25
+ snapcrawl example.com --config simple
26
+ snapcrawl example.com depth=1 log_level=2 width=768
@@ -0,0 +1,4 @@
1
+ Usage:
2
+ snapcrawl URL [--config FILE] [SETTINGS...]
3
+ snapcrawl -h | --help
4
+ snapcrawl -v | --version
@@ -0,0 +1,4 @@
1
+ Usage:
2
+ snapcrawl URL [--config FILE] [SETTINGS...]
3
+ snapcrawl -h | --help
4
+ snapcrawl -v | --version
@@ -0,0 +1,15 @@
1
+ ---
2
+ depth: 1
3
+ width: 1280
4
+ height: 0
5
+ cache_life: 86400
6
+ cache_dir: cache
7
+ snaps_dir: snaps
8
+ name_template: "%{url}"
9
+ url_whitelist:
10
+ url_blacklist:
11
+ css_selector:
12
+ log_level: 1
13
+ log_color: auto
14
+ skip_ssl_verification: false
15
+ screenshot_delay:
@@ -0,0 +1,15 @@
1
+ ---
2
+ depth: 3
3
+ width: 768
4
+ height: 0
5
+ cache_life: 86400
6
+ cache_dir: cache
7
+ snaps_dir: snaps
8
+ name_template: "%{url}"
9
+ url_whitelist:
10
+ url_blacklist:
11
+ css_selector:
12
+ log_level: 3
13
+ log_color: false
14
+ skip_ssl_verification: false
15
+ screenshot_delay:
@@ -0,0 +1,14 @@
1
+ DEBUG : initializing cli
2
+ DEBUG : initializing crawler with http://localhost:3000/filters
3
+ DEBUG : config {"depth"=>1, "width"=>1280, "height"=>0, "cache_life"=>86400, "cache_dir"=>"cache", "snaps_dir"=>"snaps", "name_template"=>"%%{url}", "url_whitelist"=>nil, "url_blacklist"=>"exclude", "css_selector"=>nil, "log_level"=>0, "log_color"=>"auto", "skip_ssl_verification"=>false, "screenshot_delay"=>nil}
4
+ DEBUG : processing queue: 1 remaining
5
+  INFO : processing http://localhost:3000/filters, depth: 0
6
+  INFO : screenshot for /filters already exists
7
+ DEBUG : ignoring http://localhost:3000/filters/exclude-me/1, reason: blacklist
8
+ DEBUG : ignoring http://localhost:3000/filters/exclude-me/2, reason: blacklist
9
+ DEBUG : processing queue: 2 remaining
10
+  INFO : processing http://localhost:3000/filters/include-me/1, depth: 1
11
+  INFO : screenshot for /filters/include-me/1 already exists
12
+ DEBUG : processing queue: 1 remaining
13
+  INFO : processing http://localhost:3000/filters/include-me/2, depth: 1
14
+  INFO : screenshot for /filters/include-me/2 already exists
@@ -0,0 +1,14 @@
1
+ DEBUG : verifying phantomjs is present
2
+ DEBUG : verifying imagemagick is present
3
+ DEBUG : initializing cli
4
+ DEBUG : initializing crawler with http://localhost:3000
5
+ DEBUG : config {"depth"=>1, "width"=>1280, "height"=>0, "cache_life"=>86400, "cache_dir"=>"cache", "snaps_dir"=>"snaps", "name_template"=>"%%{url}", "url_whitelist"=>nil, "url_blacklist"=>nil, "css_selector"=>nil, "log_level"=>1, "log_color"=>"auto", "skip_ssl_verification"=>false, "screenshot_delay"=>nil}
6
+ DEBUG : processing queue: 1 remaining
7
+  INFO : processing http://localhost:3000, depth: 0
8
+  INFO : capturing screenshot for /
9
+ DEBUG : processing queue: 2 remaining
10
+  INFO : processing http://localhost:3000/page, depth: 1
11
+  INFO : capturing screenshot for /page
12
+ DEBUG : processing queue: 1 remaining
13
+  INFO : processing http://localhost:3000/errors, depth: 1
14
+  INFO : capturing screenshot for /errors
@@ -0,0 +1,6 @@
1
+ DEBUG : initializing cli
2
+ DEBUG : initializing crawler with http://localhost:3000
3
+ DEBUG : config {"depth"=>0, "width"=>1280, "height"=>0, "cache_life"=>86400, "cache_dir"=>"cache", "snaps_dir"=>"snaps", "name_template"=>"%%{url}", "url_whitelist"=>nil, "url_blacklist"=>nil, "css_selector"=>nil, "log_level"=>1, "log_color"=>"auto", "skip_ssl_verification"=>false, "screenshot_delay"=>nil}
4
+ DEBUG : processing queue: 1 remaining
5
+  INFO : processing http://localhost:3000, depth: 0
6
+  INFO : screenshot for / already exists
@@ -0,0 +1,6 @@
1
+  WARN : Addressable::URI::InvalidURIError: Invalid scheme format: '\problematic ' on /page (link: \problematic : link)
2
+  WARN : http error on http://localhost:3000/broken, code: 404, message: Not Found
3
+  WARN : http error on http://localhost:3000/secret, code: 401, message: Unauthorized
4
+  WARN : http error on http://localhost:3000/500, code: 500, message: Internal Server Error
5
+  WARN : http error on http://localhost:3000/401, code: 401, message: Unauthorized
6
+  WARN : http error on http://localhost:3000/403, code: 403, message: Forbidden
@@ -0,0 +1,6 @@
1
+ INFO : processing http://localhost:3000, depth: 0
2
+ INFO : screenshot for / already exists
3
+ INFO : processing http://localhost:3000/page, depth: 1
4
+ INFO : screenshot for /page already exists
5
+ INFO : processing http://localhost:3000/errors, depth: 1
6
+ INFO : screenshot for /errors already exists
@@ -0,0 +1,3 @@
1
+  INFO : processing http://localhost:3000, depth: 0
2
+  INFO : capturing screenshot for /
3
+ ERROR : screenshot error on / - Snapcrawl::ScreenshotError: Simulated error
@@ -0,0 +1,14 @@
1
+ DEBUG : initializing cli
2
+ DEBUG : initializing crawler with http://localhost:3000/filters
3
+ DEBUG : config {"depth"=>1, "width"=>1280, "height"=>0, "cache_life"=>86400, "cache_dir"=>"cache", "snaps_dir"=>"snaps", "name_template"=>"%%{url}", "url_whitelist"=>"include", "url_blacklist"=>nil, "css_selector"=>nil, "log_level"=>0, "log_color"=>"auto", "skip_ssl_verification"=>false, "screenshot_delay"=>nil}
4
+ DEBUG : processing queue: 1 remaining
5
+  INFO : processing http://localhost:3000/filters, depth: 0
6
+  INFO : capturing screenshot for /filters
7
+ DEBUG : ignoring http://localhost:3000/filters/exclude-me/1, reason: whitelist
8
+ DEBUG : ignoring http://localhost:3000/filters/exclude-me/2, reason: whitelist
9
+ DEBUG : processing queue: 2 remaining
10
+  INFO : processing http://localhost:3000/filters/include-me/1, depth: 1
11
+  INFO : capturing screenshot for /filters/include-me/1
12
+ DEBUG : processing queue: 1 remaining
13
+  INFO : processing http://localhost:3000/filters/include-me/2, depth: 1
14
+  INFO : capturing screenshot for /filters/include-me/2
@@ -0,0 +1 @@
1
+  INFO : Hello World
@@ -0,0 +1,4 @@
1
+ log_level: 3
2
+ log_color: no
3
+ depth: 3
4
+ width: 768