powerdlz23 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/grell/.rspec +2 -0
  2. package/grell/.travis.yml +28 -0
  3. package/grell/CHANGELOG.md +111 -0
  4. package/grell/Gemfile +7 -0
  5. package/grell/LICENSE.txt +22 -0
  6. package/grell/README.md +213 -0
  7. package/grell/Rakefile +2 -0
  8. package/grell/grell.gemspec +36 -0
  9. package/grell/lib/grell/capybara_driver.rb +44 -0
  10. package/grell/lib/grell/crawler.rb +83 -0
  11. package/grell/lib/grell/crawler_manager.rb +84 -0
  12. package/grell/lib/grell/grell_logger.rb +10 -0
  13. package/grell/lib/grell/page.rb +275 -0
  14. package/grell/lib/grell/page_collection.rb +62 -0
  15. package/grell/lib/grell/rawpage.rb +62 -0
  16. package/grell/lib/grell/reader.rb +18 -0
  17. package/grell/lib/grell/version.rb +3 -0
  18. package/grell/lib/grell.rb +11 -0
  19. package/grell/spec/lib/capybara_driver_spec.rb +38 -0
  20. package/grell/spec/lib/crawler_manager_spec.rb +174 -0
  21. package/grell/spec/lib/crawler_spec.rb +361 -0
  22. package/grell/spec/lib/page_collection_spec.rb +159 -0
  23. package/grell/spec/lib/page_spec.rb +418 -0
  24. package/grell/spec/lib/reader_spec.rb +43 -0
  25. package/grell/spec/spec_helper.rb +66 -0
  26. package/heartmagic/config.py +1 -0
  27. package/heartmagic/heart.py +3 -0
  28. package/heartmagic/pytransform/__init__.py +483 -0
  29. package/heartmagic/pytransform/_pytransform.dll +0 -0
  30. package/heartmagic/pytransform/_pytransform.so +0 -0
  31. package/httpStatusCode/README.md +2 -0
  32. package/httpStatusCode/httpStatusCode.js +4 -0
  33. package/httpStatusCode/reasonPhrases.js +344 -0
  34. package/httpStatusCode/statusCodes.js +344 -0
  35. package/package.json +1 -1
  36. package/snapcrawl/.changelog.old.md +157 -0
  37. package/snapcrawl/.gitattributes +1 -0
  38. package/snapcrawl/.github/workflows/test.yml +41 -0
  39. package/snapcrawl/.rspec +3 -0
  40. package/snapcrawl/.rubocop.yml +23 -0
  41. package/snapcrawl/CHANGELOG.md +182 -0
  42. package/snapcrawl/Gemfile +15 -0
  43. package/snapcrawl/LICENSE +21 -0
  44. package/snapcrawl/README.md +135 -0
  45. package/snapcrawl/Runfile +35 -0
  46. package/snapcrawl/bin/snapcrawl +25 -0
  47. package/snapcrawl/lib/snapcrawl/cli.rb +52 -0
  48. package/snapcrawl/lib/snapcrawl/config.rb +60 -0
  49. package/snapcrawl/lib/snapcrawl/crawler.rb +98 -0
  50. package/snapcrawl/lib/snapcrawl/dependencies.rb +21 -0
  51. package/snapcrawl/lib/snapcrawl/exceptions.rb +5 -0
  52. package/snapcrawl/lib/snapcrawl/log_helpers.rb +36 -0
  53. package/snapcrawl/lib/snapcrawl/page.rb +118 -0
  54. package/snapcrawl/lib/snapcrawl/pretty_logger.rb +11 -0
  55. package/snapcrawl/lib/snapcrawl/refinements/pair_split.rb +26 -0
  56. package/snapcrawl/lib/snapcrawl/refinements/string_refinements.rb +13 -0
  57. package/snapcrawl/lib/snapcrawl/screenshot.rb +73 -0
  58. package/snapcrawl/lib/snapcrawl/templates/config.yml +49 -0
  59. package/snapcrawl/lib/snapcrawl/templates/docopt.txt +26 -0
  60. package/snapcrawl/lib/snapcrawl/version.rb +3 -0
  61. package/snapcrawl/lib/snapcrawl.rb +20 -0
  62. package/snapcrawl/snapcrawl.gemspec +27 -0
  63. package/snapcrawl/snapcrawl.yml +41 -0
  64. package/snapcrawl/spec/README.md +16 -0
  65. package/snapcrawl/spec/approvals/bin/help +26 -0
  66. package/snapcrawl/spec/approvals/bin/usage +4 -0
  67. package/snapcrawl/spec/approvals/cli/usage +4 -0
  68. package/snapcrawl/spec/approvals/config/defaults +15 -0
  69. package/snapcrawl/spec/approvals/config/minimal +15 -0
  70. package/snapcrawl/spec/approvals/integration/blacklist +14 -0
  71. package/snapcrawl/spec/approvals/integration/default-config +14 -0
  72. package/snapcrawl/spec/approvals/integration/depth-0 +6 -0
  73. package/snapcrawl/spec/approvals/integration/depth-3 +6 -0
  74. package/snapcrawl/spec/approvals/integration/log-color-no +6 -0
  75. package/snapcrawl/spec/approvals/integration/screenshot-error +3 -0
  76. package/snapcrawl/spec/approvals/integration/whitelist +14 -0
  77. package/snapcrawl/spec/approvals/models/pretty_logger/colors +1 -0
  78. package/snapcrawl/spec/fixtures/config/minimal.yml +4 -0
  79. package/snapcrawl/spec/server/config.ru +97 -0
  80. package/snapcrawl/spec/snapcrawl/bin_spec.rb +15 -0
  81. package/snapcrawl/spec/snapcrawl/cli_spec.rb +9 -0
  82. package/snapcrawl/spec/snapcrawl/config_spec.rb +26 -0
  83. package/snapcrawl/spec/snapcrawl/integration_spec.rb +65 -0
  84. package/snapcrawl/spec/snapcrawl/page_spec.rb +89 -0
  85. package/snapcrawl/spec/snapcrawl/pretty_logger_spec.rb +19 -0
  86. package/snapcrawl/spec/snapcrawl/refinements/pair_split_spec.rb +27 -0
  87. package/snapcrawl/spec/snapcrawl/refinements/string_refinements_spec.rb +29 -0
  88. package/snapcrawl/spec/snapcrawl/screenshot_spec.rb +62 -0
  89. package/snapcrawl/spec/spec_helper.rb +22 -0
  90. package/snapcrawl/spec/spec_mixin.rb +10 -0
@@ -0,0 +1,97 @@
1
+ require 'sinatra/base'
2
+
3
+ class Public < Sinatra::Base
4
+ get '/' do
5
+ "<a href='/page'>Hello World</a><br>" \
6
+ "<a href='/errors'>Errors</a>"
7
+ end
8
+
9
+ get '/page' do
10
+ <<~HTML
11
+ Some Page with links:<br>
12
+ <a href='/broken'>Broken Link</a><br>
13
+ <a href='/ok'>Valid Link</a><br>
14
+ <a href='https://www.example.com/'>External</a><br>
15
+ <a href='https://www.\u0105.com/'>Unicode</a><br>
16
+ <a href='\\problematic : link'>problematic link</a><br>
17
+ <a name='anchor-without-href'>without href</a><br>
18
+ <a href='#id'>with hash only href</a><br>
19
+ HTML
20
+ end
21
+
22
+ get '/errors' do
23
+ <<~HTML
24
+ <a href='/secret'>Basic Auth Dialog Link</a><br>
25
+ <a href='/500'>500 Link</a><br>
26
+ <a href='/401'>401 Link</a><br>
27
+ <a href='/403'>403 Link</a><br>
28
+ HTML
29
+ end
30
+
31
+ get '/ok' do
32
+ "<a href='/deeper/ok'>Depper Valid Link</a><br>OK"
33
+ end
34
+
35
+ get '/deeper/ok' do
36
+ 'ALSO OK'
37
+ end
38
+
39
+ get '/selector' do
40
+ <<~HTML
41
+ <p>
42
+ Outside section with a long text:
43
+ Where could they be? Close the blast doors! Open the blast doors! Open the
44
+ blast doors! I've been waiting for you, Obi-Wan. We meet again, at last.
45
+ The circle is now complete. When I left you, I was but the learner,
46
+ now I am the master. Only a master of evil, Darth. Your powers are weak,
47
+ old man. You can't win, Darth. If you strike me down, I shall become more
48
+ powerful than you can possibly imagine.
49
+ </p>
50
+ <p class='select-me'>And a small one</p>
51
+ HTML
52
+ end
53
+
54
+ get '/filters' do
55
+ <<~HTML
56
+ <a href='/filters/include-me/1'>Included</a><br>
57
+ <a href='/filters/include-me/2'>Included</a><br>
58
+ <a href='/filters/exclude-me/1'>Excluded</a><br>
59
+ <a href='/filters/exclude-me/2'>Excluded</a><br>
60
+ HTML
61
+ end
62
+
63
+ get '/filters/include-me/:id' do
64
+ "include-me #{params[:id]}"
65
+ end
66
+
67
+ get '/filters/exclude-me/:id' do
68
+ "exclude-me #{params[:id]}"
69
+ end
70
+
71
+ get '/500' do
72
+ raise 'server error'
73
+ end
74
+
75
+ get '/401' do
76
+ status 401
77
+ '401 Unauthorized'
78
+ end
79
+
80
+ get '/403' do
81
+ status 403
82
+ '403 Forbidden'
83
+ end
84
+ end
85
+
86
+ class Protected < Sinatra::Base
87
+ use Rack::Auth::Basic, 'Protected Area' do |username, password|
88
+ username == 'user' && password == 'pass'
89
+ end
90
+
91
+ get('/') { 'secret' }
92
+ end
93
+
94
+ run Rack::URLMap.new({
95
+ '/' => Public,
96
+ '/secret' => Protected,
97
+ })
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'bin/snapcrawl' do
4
+ it 'shows usage patterns' do
5
+ expect(`bin/snapcrawl`).to match_approval 'bin/usage'
6
+ end
7
+
8
+ it 'shows help' do
9
+ expect(`bin/snapcrawl --help`).to match_approval 'bin/help'
10
+ end
11
+
12
+ it 'shows correct version' do
13
+ expect(`bin/snapcrawl -v`).to match VERSION
14
+ end
15
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe CLI do
4
+ subject { described_class.new }
5
+
6
+ it 'shows usage' do
7
+ expect { subject.call }.to output_approval('cli/usage')
8
+ end
9
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe Config do
4
+ subject { described_class }
5
+
6
+ describe '#load' do
7
+ it 'has defaults' do
8
+ subject.load
9
+ expect(subject.settings.to_yaml).to match_approval('config/defaults').diff(4)
10
+ end
11
+
12
+ it 'loads file if it exists and merges it with the defaults' do
13
+ subject.load 'spec/fixtures/config/minimal'
14
+ expect(subject.settings.to_yaml).to match_approval('config/minimal').diff(4)
15
+ end
16
+
17
+ context 'when the config file is not found' do
18
+ before { system 'rm -f tmp/config.yml' }
19
+
20
+ it 'creates it' do
21
+ subject.load 'tmp/config.yml'
22
+ expect(File.read 'tmp/config.yml').to eq File.read('lib/snapcrawl/templates/config.yml')
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'integration' do
4
+ subject { CLI.new }
5
+
6
+ let(:url) { 'http://localhost:3000' }
7
+ let(:log) { @logger.string }
8
+
9
+ before do
10
+ @logger = fresh_logger
11
+ Config.load # reload defaults
12
+ end
13
+
14
+ it 'executes successfully' do
15
+ subject.call [url]
16
+ expect(log).to match_approval('integration/default-config')
17
+ end
18
+
19
+ context 'with depth=0' do
20
+ it 'captures the first page only' do
21
+ subject.call [url, 'depth=0']
22
+ expect(log).to match_approval('integration/depth-0')
23
+ end
24
+ end
25
+
26
+ context 'with depth=3 log_level=2' do
27
+ it 'captures 4 levels and shows warnings and above' do
28
+ subject.call [url, 'depth=3', 'log_level=2']
29
+ expect(log).to match_approval('integration/depth-3')
30
+ end
31
+ end
32
+
33
+ context 'with log_color=no' do
34
+ it 'outputs without colors' do
35
+ subject.call [url, 'log_color=no', 'log_level=1']
36
+ expect(log).to match_approval('integration/log-color-no')
37
+ end
38
+ end
39
+
40
+ context 'with url_whitelist' do
41
+ let(:url) { 'http://localhost:3000/filters' }
42
+
43
+ it 'only processes urls that match the regex' do
44
+ subject.call [url, 'url_whitelist=include', 'log_level=0']
45
+ expect(log).to match_approval('integration/whitelist')
46
+ end
47
+ end
48
+
49
+ context 'with url_blacklist' do
50
+ let(:url) { 'http://localhost:3000/filters' }
51
+
52
+ it 'ignores urls that match the regex' do
53
+ subject.call [url, 'url_blacklist=exclude', 'log_level=0']
54
+ expect(log).to match_approval('integration/blacklist')
55
+ end
56
+ end
57
+
58
+ context 'when screenshot errors' do
59
+ it 'logs the error and continues' do
60
+ expect_any_instance_of(Screenshot).to receive(:save).and_raise(ScreenshotError, 'Simulated error')
61
+ subject.call [url, 'cache_life=0', 'depth=0', 'log_level=1']
62
+ expect(log).to match_approval('integration/screenshot-error')
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ describe Page do
4
+ subject { described_class.new url }
5
+
6
+ let(:url) { 'http://localhost:3000/page' }
7
+
8
+ describe '#valid?' do
9
+ context 'when the page is valid' do
10
+ it 'returns true' do
11
+ expect(subject).to be_valid
12
+ end
13
+ end
14
+
15
+ context 'when the page is not found' do
16
+ let(:url) { 'http://localhost:3000/not-found' }
17
+
18
+ it 'returns false and logs a warning' do
19
+ expect($logger).to receive(:warn).with(/code.*404/)
20
+ expect(subject).not_to be_valid
21
+ end
22
+ end
23
+
24
+ context "when the url can't be reached" do
25
+ let(:url) { 'http://localhost:1111/' }
26
+
27
+ it 'returns false and logs an error' do
28
+ expect($logger).to receive(:error).with(/connection refused/i)
29
+ expect(subject).not_to be_valid
30
+ end
31
+ end
32
+
33
+ context 'when the url has a bad SSL certificate' do
34
+ let(:url) { 'https://untrusted-root.badssl.com/' }
35
+
36
+ it 'returns false and logs an error' do
37
+ expect($logger).to receive(:error).with(/certificate verify failed/i)
38
+ expect(subject).not_to be_valid
39
+ end
40
+
41
+ context 'when skip_ssl_verification is true' do
42
+ before { Config.skip_ssl_verification = true }
43
+ after { Config.skip_ssl_verification = false }
44
+
45
+ it 'returns true' do
46
+ expect(subject).to be_valid
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ describe '#site' do
53
+ it 'returns the site name' do
54
+ expect(subject.site).to eq 'http://localhost:3000'
55
+ end
56
+ end
57
+
58
+ describe '#path' do
59
+ it 'returns the path' do
60
+ expect(subject.path).to eq '/page'
61
+ end
62
+ end
63
+
64
+ describe '#links' do
65
+ it 'returns an array of links on the page and logs warnings' do
66
+ expect($logger).to receive(:warn).with(/problematic/)
67
+ expect(subject.links).to eq ['http://localhost:3000/broken', 'http://localhost:3000/ok']
68
+ end
69
+ end
70
+
71
+ describe '#pages' do
72
+ it 'returns an array of Page objects from the links' do
73
+ expect($logger).to receive(:warn).with(/problematic/)
74
+ pages = subject.pages
75
+ expect(pages.count).to eq 2
76
+ expect(pages.first).to be_a described_class
77
+ end
78
+ end
79
+
80
+ describe '#save_screenshot' do
81
+ let(:double) { Screenshot.new subject.url }
82
+
83
+ it 'delegates to Screenshot' do
84
+ allow(Screenshot).to receive(:new).with(subject.url).and_return(double)
85
+ expect(double).to receive(:save).with('outfile')
86
+ subject.save_screenshot 'outfile'
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe PrettyLogger do
4
+ before { Config.load }
5
+
6
+ describe '::new' do
7
+ it 'returns a Logger instance' do
8
+ expect(subject).to be_a Logger
9
+ end
10
+ end
11
+
12
+ describe 'log formatting' do
13
+ let(:message) { 'g`Hello World`' }
14
+
15
+ it 'logs with colors' do
16
+ expect { subject.info message }.to output_approval('models/pretty_logger/colors')
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe PairSplit do
4
+ using described_class
5
+
6
+ subject { ['key=value', 'key2=value2'] }
7
+
8
+ it 'splits an array of key=value elements into a hash' do
9
+ expect(subject.pair_split).to eq({ 'key' => 'value', 'key2' => 'value2' })
10
+ end
11
+
12
+ context 'when the value is integer-like' do
13
+ subject { ['cakes=3'] }
14
+
15
+ it 'convertts it to integer' do
16
+ expect(subject.pair_split['cakes']).to eq 3
17
+ end
18
+ end
19
+
20
+ context 'when the value is boolean-like' do
21
+ subject { ['pizza=yes', 'burger=true', 'broccoli=no', 'eggplant=false'] }
22
+
23
+ it 'convertts it to boolean' do
24
+ expect(subject.pair_split).to eq({ 'pizza' => true, 'burger' => true, 'broccoli' => false, 'eggplant' => false })
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringRefinements do
4
+ using described_class
5
+
6
+ describe '#to_slug' do
7
+ it 'converts to hyphen-delimited string' do
8
+ expect('http://comain.com/index.html'.to_slug).to eq 'http-comain-com-index-html'
9
+ end
10
+ end
11
+
12
+ describe '#protocolize' do
13
+ context 'when the string does not start with http' do
14
+ subject { 'example.com' }
15
+
16
+ it 'adds http://' do
17
+ expect(subject.protocolize).to eq "http://#{subject}"
18
+ end
19
+ end
20
+
21
+ context 'when the string starts with http' do
22
+ subject { 'https://example.com' }
23
+
24
+ it 'does nothing' do
25
+ expect(subject.protocolize).to eq subject
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ describe Screenshot do
4
+ subject { described_class.new url }
5
+
6
+ let(:url) { 'http://localhost:3000/page' }
7
+
8
+ describe '#save' do
9
+ let(:outfile) { 'tmp/screenshot.png' }
10
+
11
+ before do
12
+ system 'rm -f tmp/*.png'
13
+ expect(Dir['tmp/*.png'].count).to eq 0
14
+ end
15
+
16
+ it 'saves a screenshot' do
17
+ subject.save outfile
18
+ expect(File).to exist outfile
19
+ expect(File.size outfile).to be > 22_000
20
+ end
21
+
22
+ context 'when Config.css_selector is set' do
23
+ let(:url) { 'http://localhost:3000/selector' }
24
+
25
+ before do
26
+ Config.css_selector = nil
27
+ subject.save 'tmp/full-page.png'
28
+ end
29
+
30
+ after do
31
+ Config.css_selector = nil
32
+ end
33
+
34
+ it 'only captures the selected area' do
35
+ Config.css_selector = '.select-me'
36
+ subject.save 'tmp/selected-area.png'
37
+ full_size = File.size('tmp/full-page.png')
38
+ selected_size = File.size('tmp/selected-area.png')
39
+
40
+ expect(selected_size).to be < full_size
41
+ end
42
+ end
43
+
44
+ context 'when Config.screenshot_delay is set' do
45
+ before { Config.screenshot_delay = 3 }
46
+ after { Config.screenshot_delay = nil }
47
+
48
+ it 'sends the timeout argument to webshot' do
49
+ expect(Webshot::Screenshot.instance).to receive(:capture).with(String, String, hash_including({ timeout: 3 }))
50
+ subject.save 'tmp/delay.png'
51
+ end
52
+ end
53
+
54
+ context 'when there is an error' do
55
+ let(:url) { 'http://localhost:1111' }
56
+
57
+ it 'raises ScreenshotError' do
58
+ expect { subject.save outfile }.to raise_error(ScreenshotError)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,22 @@
1
+ require 'simplecov'
2
+
3
+ SimpleCov.start
4
+
5
+ require 'rubygems'
6
+ require 'bundler'
7
+ Bundler.require :default, :development
8
+
9
+ require 'stringio'
10
+ include Snapcrawl
11
+
12
+ # Consistent rspec fixtures output
13
+ ENV['TTY'] = 'on'
14
+
15
+ system 'rm -rf snaps'
16
+ system 'mkdir -p tmp'
17
+
18
+ require_relative 'spec_mixin'
19
+ RSpec.configure do |config|
20
+ config.include SpecMixin
21
+ config.example_status_persistence_file_path = 'spec/status.txt'
22
+ end
@@ -0,0 +1,10 @@
1
+ module SpecMixin
2
+ include Snapcrawl::LogHelpers
3
+
4
+ def fresh_logger
5
+ spec_logger = StringIO.new
6
+ $logger = Logger.new spec_logger
7
+ $logger.formatter = log_formatter
8
+ spec_logger
9
+ end
10
+ end