npm - powerdlz23 - Versions diffs - 1.2.4 → 1.2.5 - Mend

powerdlz23 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/grell/.rspec +2 -0
package/grell/.travis.yml +28 -0
package/grell/CHANGELOG.md +111 -0
package/grell/Gemfile +7 -0
package/grell/LICENSE.txt +22 -0
package/grell/README.md +213 -0
package/grell/Rakefile +2 -0
package/grell/grell.gemspec +36 -0
package/grell/lib/grell/capybara_driver.rb +44 -0
package/grell/lib/grell/crawler.rb +83 -0
package/grell/lib/grell/crawler_manager.rb +84 -0
package/grell/lib/grell/grell_logger.rb +10 -0
package/grell/lib/grell/page.rb +275 -0
package/grell/lib/grell/page_collection.rb +62 -0
package/grell/lib/grell/rawpage.rb +62 -0
package/grell/lib/grell/reader.rb +18 -0
package/grell/lib/grell/version.rb +3 -0
package/grell/lib/grell.rb +11 -0
package/grell/spec/lib/capybara_driver_spec.rb +38 -0
package/grell/spec/lib/crawler_manager_spec.rb +174 -0
package/grell/spec/lib/crawler_spec.rb +361 -0
package/grell/spec/lib/page_collection_spec.rb +159 -0
package/grell/spec/lib/page_spec.rb +418 -0
package/grell/spec/lib/reader_spec.rb +43 -0
package/grell/spec/spec_helper.rb +66 -0
package/heartmagic/config.py +1 -0
package/heartmagic/heart.py +3 -0
package/heartmagic/pytransform/__init__.py +483 -0
package/heartmagic/pytransform/_pytransform.dll +0 -0
package/heartmagic/pytransform/_pytransform.so +0 -0
package/httpStatusCode/README.md +2 -0
package/httpStatusCode/httpStatusCode.js +4 -0
package/httpStatusCode/reasonPhrases.js +344 -0
package/httpStatusCode/statusCodes.js +344 -0
package/package.json +1 -1
package/snapcrawl/.changelog.old.md +157 -0
package/snapcrawl/.gitattributes +1 -0
package/snapcrawl/.github/workflows/test.yml +41 -0
package/snapcrawl/.rspec +3 -0
package/snapcrawl/.rubocop.yml +23 -0
package/snapcrawl/CHANGELOG.md +182 -0
package/snapcrawl/Gemfile +15 -0
package/snapcrawl/LICENSE +21 -0
package/snapcrawl/README.md +135 -0
package/snapcrawl/Runfile +35 -0
package/snapcrawl/bin/snapcrawl +25 -0
package/snapcrawl/lib/snapcrawl/cli.rb +52 -0
package/snapcrawl/lib/snapcrawl/config.rb +60 -0
package/snapcrawl/lib/snapcrawl/crawler.rb +98 -0
package/snapcrawl/lib/snapcrawl/dependencies.rb +21 -0
package/snapcrawl/lib/snapcrawl/exceptions.rb +5 -0
package/snapcrawl/lib/snapcrawl/log_helpers.rb +36 -0
package/snapcrawl/lib/snapcrawl/page.rb +118 -0
package/snapcrawl/lib/snapcrawl/pretty_logger.rb +11 -0
package/snapcrawl/lib/snapcrawl/refinements/pair_split.rb +26 -0
package/snapcrawl/lib/snapcrawl/refinements/string_refinements.rb +13 -0
package/snapcrawl/lib/snapcrawl/screenshot.rb +73 -0
package/snapcrawl/lib/snapcrawl/templates/config.yml +49 -0
package/snapcrawl/lib/snapcrawl/templates/docopt.txt +26 -0
package/snapcrawl/lib/snapcrawl/version.rb +3 -0
package/snapcrawl/lib/snapcrawl.rb +20 -0
package/snapcrawl/snapcrawl.gemspec +27 -0
package/snapcrawl/snapcrawl.yml +41 -0
package/snapcrawl/spec/README.md +16 -0
package/snapcrawl/spec/approvals/bin/help +26 -0
package/snapcrawl/spec/approvals/bin/usage +4 -0
package/snapcrawl/spec/approvals/cli/usage +4 -0
package/snapcrawl/spec/approvals/config/defaults +15 -0
package/snapcrawl/spec/approvals/config/minimal +15 -0
package/snapcrawl/spec/approvals/integration/blacklist +14 -0
package/snapcrawl/spec/approvals/integration/default-config +14 -0
package/snapcrawl/spec/approvals/integration/depth-0 +6 -0
package/snapcrawl/spec/approvals/integration/depth-3 +6 -0
package/snapcrawl/spec/approvals/integration/log-color-no +6 -0
package/snapcrawl/spec/approvals/integration/screenshot-error +3 -0
package/snapcrawl/spec/approvals/integration/whitelist +14 -0
package/snapcrawl/spec/approvals/models/pretty_logger/colors +1 -0
package/snapcrawl/spec/fixtures/config/minimal.yml +4 -0
package/snapcrawl/spec/server/config.ru +97 -0
package/snapcrawl/spec/snapcrawl/bin_spec.rb +15 -0
package/snapcrawl/spec/snapcrawl/cli_spec.rb +9 -0
package/snapcrawl/spec/snapcrawl/config_spec.rb +26 -0
package/snapcrawl/spec/snapcrawl/integration_spec.rb +65 -0
package/snapcrawl/spec/snapcrawl/page_spec.rb +89 -0
package/snapcrawl/spec/snapcrawl/pretty_logger_spec.rb +19 -0
package/snapcrawl/spec/snapcrawl/refinements/pair_split_spec.rb +27 -0
package/snapcrawl/spec/snapcrawl/refinements/string_refinements_spec.rb +29 -0
package/snapcrawl/spec/snapcrawl/screenshot_spec.rb +62 -0
package/snapcrawl/spec/spec_helper.rb +22 -0
package/snapcrawl/spec/spec_mixin.rb +10 -0

package/grell/spec/lib/page_spec.rb ADDED Viewed

@@ -0,0 +1,418 @@
+RSpec.describe Grell::Page do
+  let(:page_id) { rand(10).floor + 10 }
+  let(:parent_page_id) { rand(10).floor }
+  let(:page) { Grell::Page.new(url, page_id, parent_page_id) }
+  let(:host) { 'http://www.example.com' }
+  let(:url) { 'http://www.example.com/test' }
+  let(:returned_headers)  { { 'Other-Header' => 'yes', 'Content-Type' => 'text/html' }}
+  let(:now) { Time.now }
+  before do
+    allow(Time).to receive(:now).and_return(now)
+    Grell.logger = Logger.new(nil) # avoids noise in rspec output
+  end
+  it 'gives access to the url' do
+    expect(page.url).to eq(url)
+  end
+  it 'gives access to the path' do
+    expect(page.path).to eq('/test')
+  end
+  it 'gives access to the page id' do
+    expect(page.id).to eq(page_id)
+  end
+  it 'gives access to the parent page id' do
+    expect(page.parent_id).to eq(parent_page_id)
+  end
+  it 'newly created page does not have status yet' do
+    expect(page.status).to eq(nil)
+  end
+  shared_examples_for 'a grell page' do
+    it 'returns the correct status' do
+      expect(page.status).to eq(status)
+    end
+    it 'has the correct body' do
+      expect(page.body).to eq(body)
+    end
+    it 'has correct headers' do
+      expect(page.headers).to include(expected_headers)
+    end
+    it 'has the correct links' do
+      expect(page.links.sort).to eq(links.sort)
+    end
+    it '#visited? returns the correct value' do
+      expect(page.visited?).to eq(visited)
+    end
+    it 'has correct timestamp' do
+      expect(page.timestamp).to eq(now)
+    end
+  end
+  describe '#retries' do
+    context 'page has not been navigated' do
+      it '#retries return 0' do
+        expect(page.retries).to eq(0)
+      end
+    end
+    context 'page has been navigated once' do
+      before do
+        proxy.stub(url).and_return(body: '', code: 200, headers: {})
+        page.navigate
+      end
+      it '#retries return 0' do
+        expect(page.retries).to eq(0)
+      end
+    end
+    context 'page has been navigated twice' do
+      before do
+        proxy.stub(url).and_return(body: '', code: 200, headers: {})
+        page.navigate
+        page.navigate
+      end
+      it '#retries return 1' do
+        expect(page.retries).to eq(1)
+      end
+    end
+  end
+  describe '#navigate' do
+    before do
+      proxy.stub(url).and_return(body: '', code: 200, headers: {})
+    end
+    it 'waits for all ajax requests' do
+      expect_any_instance_of(Grell::RawPage).to receive(:wait_for_all_ajax_requests).with(0, 0.5)
+      page.navigate
+    end
+  end
+  shared_examples_for 'an errored grell page' do
+    it 'returns empty status 404 page after navigating' do
+      expect(page.status).to eq(404)
+      expect(page.links).to eq([])
+      expect(page.headers).to eq(headers)
+      expect(page.body).to eq('')
+      expect(page.has_selector?('html')).to eq(false)
+      expect(page).to be_visited
+      expect(page.timestamp).to eq(now)
+      expect(page.error?).to eq(true)
+      expect(page.instance_variable_get(:@times_visited)).to eq(1)
+    end
+  end
+  [ Capybara::Poltergeist::JavascriptError, Capybara::Poltergeist::BrowserError, URI::InvalidURIError,
+    Capybara::Poltergeist::TimeoutError, Capybara::Poltergeist::StatusFailError,
+    Capybara::Poltergeist::DeadClient, Errno::ECONNRESET ].each do |error_type|
+    context "#{error_type}" do
+      let(:headers) do
+        {
+          grellStatus: 'Error',
+          errorClass: "#{error_type}",
+          errorMessage: error_message
+        }
+      end
+      let(:error_message) { 'Trusmis broke it again' }
+      let(:now) { Time.now }
+      before do
+        allow_any_instance_of(Grell::RawPage).to receive(:navigate).and_raise(error_type, 'error')
+        allow_any_instance_of(error_type).to receive(:message).and_return(error_message)
+        page.navigate
+      end
+      it_behaves_like 'an errored grell page'
+    end
+  end
+  context 'we have not yet navigated to the page' do
+    let(:visited) { false }
+    let(:status) { nil }
+    let(:body) { '' }
+    let(:links) { [] }
+    let(:expected_headers) { {} }
+    let(:now) { nil }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+    end
+    it_behaves_like 'a grell page'
+  end
+  context 'navigating to the URL we get a 404' do
+    let(:visited) { true }
+    let(:status) { 404 }
+    let(:body) { '<html><head></head><body>nothing cool</body></html>' }
+    let(:links) { [] }
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+  end
+  context 'navigating to an URL with redirects, follows them transparently' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) { '<html><head></head><body>nothing cool</body></html>' }
+    let(:links) { [] }
+    let(:expected_headers) { returned_headers }
+    let(:real_url) { 'http://example.com/other' }
+    before do
+      proxy.stub(url).and_return(:redirect_to => real_url)
+      proxy.stub(real_url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    it 'followed_redirects? is true' do
+      expect(page.followed_redirects?).to eq(true)
+    end
+    it 'current_url match the url we were redirected to' do
+      expect(page.current_url).to eq(real_url)
+    end
+  end
+  #Here also add examples that may happen for almost all pages (no errors, no redirects)
+  context 'navigating to the URL we get page with no links' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) { '<html><head></head><body>nothing cool</body></html>' }
+    let(:links) { [] }
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    it 'followed_redirects is false' do
+      expect(page.followed_redirects?).to eq(false)
+    end
+    it 'current_url is url' do
+      expect(page.current_url).to eq(url)
+    end
+    it 'does not have errors' do
+      expect(page.error?).to eq(false)
+    end
+  end
+  context 'navigating to the URL we get page with links using a elements' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) do
+      "<html><head></head><body>
+      Hello world!
+      <a href=\"/trusmis.html\">trusmis</a>
+      <a href=\"/help.html\">help</a>
+      <a href=\"http://www.outsidewebsite.com/help.html\">help</a>
+      </body></html>"
+    end
+    let(:links) { ['http://www.example.com/trusmis.html', 'http://www.example.com/help.html'] }
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    it 'do not return links to external websites' do
+      expect(page.links).to_not include('http://www.outsidewebsite.com/help.html')
+    end
+  end
+  context 'navigating to the URL we get page with disabled links' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) do
+      "<html><head></head><body>
+      Hello world!
+      <a href=\"/trusmis.html\">trusmis</a>
+      <a href=\"/help.html\">help</a>
+      <a href=\"javascript: void(0)\">help</a>
+      <a href=\"/helpdisabled.html\" disabled=\"\">helpdisabled</a>
+      </body></html>"
+    end
+    let(:links) { ['http://www.example.com/trusmis.html', 'http://www.example.com/help.html'] }
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+  end
+  context 'navigating to the URL we get page with links with absolute links' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) do
+      "<html><head></head><body>
+      Hello world!
+      <a href=\"/trusmis.html\">trusmis</a>
+      <a href=\"http://www.example.com/help.html\">help</a>
+      <a href=\"http://www.outsidewebsite.com/help.html\">help</a>
+      </body></html>"
+    end
+    let(:links) { ['http://www.example.com/trusmis.html', 'http://www.example.com/help.html'] }
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    it 'do not return links to external websites' do
+      expect(page.links).to_not include('http://www.outsidewebsite.com/help.html')
+    end
+  end
+  context 'navigating to the URL we get page with links using a mix of elements' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:body) do
+      "<html><head></head><body>
+      Hello world!
+      <a href=\"/trusmis.html\">trusmis</a>
+      <table>
+      <tbody>
+      <tr href=\"/help_me.html\"><td>help</td></tr>
+      <tr data-href=\"/help.html\"><td>help</td></tr>
+      </tbody>
+      </table>
+      <div data-href=\"http://www.example.com/more_help.html\">help</div>
+      <div data-href=\"http://www.outsidewebsite.com/help.html\">help</div>
+      </body></html>"
+    end
+    let(:links) do
+      [ 'http://www.example.com/trusmis.html', 'http://www.example.com/help.html',
+        'http://www.example.com/more_help.html', 'http://www.example.com/help_me.html' ]
+    end
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    describe '#path' do
+      context 'proper url' do
+        let(:url) { 'http://www.anyurl.com/path' }
+        let(:page) { Grell::Page.new(url, page_id, parent_page_id) }
+        it 'returns the path' do
+          expect(page.path).to eq('/path')
+        end
+      end
+      context 'broken url' do
+        let(:url) { 'www.an.asda.fasfasf.yurl.com/path' }
+        let(:page) { Grell::Page.new(url, page_id, parent_page_id) }
+        it 'returns the path' do
+          expect(page.path).to eq(url)
+        end
+      end
+    end
+    it 'do not return links to external websites' do
+      expect(page.links).to_not include('http://www.outsidewebsite.com/help.html')
+    end
+  end
+ context 'navigating to the URL we get page with links inside the header section of the code' do
+    let(:visited) { true }
+    let(:status) { 200 }
+    let(:css) { '/application.css' }
+    let(:favicon) { '/favicon.ico' }
+    let(:body) do
+      "<html><head>
+      <title>mimi</title>
+      <link href=\"#{css}\" rel=\"stylesheet\">
+      <link href=\"#{favicon}\" rel=\"shortcut icon\" type=\"image/vnd.microsoft.icon\">
+      </head>
+      <body>
+      Hello world!
+      <a href=\"/trusmis.html\">trusmis</a>
+      </body></html>"
+    end
+    let(:links) do
+      ['http://www.example.com/trusmis.html']
+    end
+    let(:expected_headers) { returned_headers }
+    before do
+      proxy.stub(url).and_return(body: body, code: status, headers: returned_headers.dup)
+      #We need to stub this or Phantomjs will get stuck trying to retrieve the resources
+      proxy.stub(host + css).and_return(body: '', code: status)
+      proxy.stub(host + favicon).and_return(body: '', code: status)
+      page.navigate
+    end
+    it_behaves_like 'a grell page'
+    it 'do not return links to resources in the header' do
+      expect(page.links).to_not include('http://www.example.com/application.css')
+    end
+  end
+  context 'status is never set' do #this may happen when there is nothing comming from the site
+    before do
+      stub_const('Grell::Page::WAIT_TIME', 0)
+      allow_any_instance_of(Grell::RawPage).to receive(:status).and_return(nil)
+      allow_any_instance_of(Grell::RawPage).to receive(:headers).and_return({})
+      allow_any_instance_of(Grell::RawPage).to receive(:body).and_return('')
+      proxy.stub(url).and_return(body: body, code: nil, headers: {})
+      page.navigate
+    end
+    let(:visited) { true }
+    let(:status) { nil }
+    let(:body) { '' }
+    let(:links) { [] }
+    let(:expected_headers) { {} }
+    it_behaves_like 'a grell page'
+  end
+end

package/grell/spec/lib/reader_spec.rb ADDED Viewed

@@ -0,0 +1,43 @@
+RSpec.describe Grell::Reader do
+  context 'Waiting time expired' do
+    let(:waiting_time) {0}
+    let(:sleeping_time) {2}
+    let(:condition) {false}
+    it 'does not sleep' do
+      before_time = Time.now
+      Grell::Reader.wait_for(->{''}, waiting_time, sleeping_time) do
+        condition
+      end
+      expect(Time.now - before_time).to be < 1
+    end
+  end
+  context 'The condition is true' do
+    let(:waiting_time) {3}
+    let(:sleeping_time) {2}
+    let(:condition) {true}
+    it 'does not sleep' do
+      before_time = Time.now
+      Grell::Reader.wait_for(->{''}, waiting_time, sleeping_time) do
+        condition
+      end
+      expect(Time.now - before_time).to be < 1
+    end
+  end
+  context 'The condition is false' do
+    let(:waiting_time) {0.2}
+    let(:sleeping_time) {0.2}
+    let(:condition) {false}
+    it 'waits the waiting time' do
+      before_time = Time.now
+      Grell::Reader.wait_for(->{''}, waiting_time, sleeping_time) do
+        condition
+      end
+      expect(Time.now - before_time).to be > waiting_time
+    end
+  end
+end

package/grell/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,66 @@
+require 'grell'
+require 'byebug'
+require 'timecop'
+require 'webmock/rspec'
+require 'billy/capybara/rspec'
+require 'rack'
+require 'rack/server'
+# This will trick Puffing-billy into using this logger instead of its own
+# Puffing billy is very noisy and we do not want to see that in our output
+class Rails
+  def self.logger
+    Logger.new(nil)
+  end
+end
+WebMock.disable_net_connect!
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  # We do not need to wait for pages to return all the data
+  config.before do
+    stub_const("Grell::Page::WAIT_TIME", 0)
+    allow_any_instance_of(Capybara::Session).to receive(:evaluate_script).and_return(nil)
+  end
+  config.expect_with :rspec do |expectations|
+    expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+  end
+  config.mock_with :rspec do |mocks|
+    # Prevents you from mocking or stubbing a method that does not exist on
+    # a real object.
+    mocks.verify_partial_doubles = true
+  end
+  # Limits the available syntax to the non-monkey patched syntax that is recommended.
+  config.disable_monkey_patching!
+  # This setting enables warnings. It's recommended, but in some cases may
+  # be too noisy due to issues in dependencies.
+  # TODO: Billy puffy has lots of warnings, test this with new versions
+  # config.warnings = true
+  # Many RSpec users commonly either run the entire suite or an individual
+  # file, and it's useful to allow more verbose output when running an
+  # individual spec file.
+  if config.files_to_run.one?
+    # Use the documentation formatter for detailed output,
+    # unless a formatter has already been configured
+    # (e.g. via a command-line flag).
+    config.default_formatter = 'doc'
+  end
+  config.order = :random
+  Kernel.srand config.seed
+  Capybara.javascript_driver = :poltergeist_billy
+  Capybara.default_driver = :poltergeist_billy
+#  config.profile_examples = 10
+end