RubyGems - pageflow-chart - Versions diffs - 2.1.0 → 2.4.0 - Mend

pageflow-chart 2.1.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

data/spec/models/pageflow/chart/scraped_site_spec.rb CHANGED Viewed

@@ -26,5 +26,59 @@ module Pageflow::Chart
       expect(scraped_site_with_custom_theme.use_custom_theme).to eq(true)
       expect(scraped_site_without_custom_theme.use_custom_theme).to eq(false)
     end
+    it 'exposes all attachments for export' do
+      scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html')
+      expect(scraped_site.attachments_for_export.map(&:name))
+        .to eq(%i[javascript_file javascript_body_file stylesheet_file html_file csv_file])
+    end
+    describe '#publish!' do
+      it 'transitions state to processing for new site' do
+        scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html')
+        scraped_site.publish!
+        expect(scraped_site.state).to eq('processing')
+      end
+      it 'transitions state to processed if html file is already set ' \
+         '(e.g. for sites that have been created via entry import)' do
+        scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
+                                       html_file_file_name: 'index.html')
+        scraped_site.publish!
+        expect(scraped_site.state).to eq('processed')
+      end
+    end
+    describe '#retryable?' do
+      it 'is true if processing_failed' do
+        scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
+                                       state: 'processing_failed')
+        expect(scraped_site).to be_retryable
+      end
+      it 'is false if processed' do
+        scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
+                                       state: 'processed')
+        expect(scraped_site).not_to be_retryable
+      end
+    end
+    describe '#retry!' do
+      it 'transitions state to processing if processing_failed' do
+        scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
+                                       state: 'processing_failed')
+        scraped_site.retry!
+        expect(scraped_site.state).to eq('processing')
+      end
+    end
   end
 end

data/spec/pageflow/chart/downloader_spec.rb CHANGED Viewed

@@ -17,17 +17,27 @@ module Pageflow
           expect(result).to eq("aaa")
         end
-        it 'ignores HTTP response 404' do
+        it 'ignores HTTP response 404 by default' do
           downloader = Downloader.new
           result = ''
-          stub_request(:get, "http://example.com/a").to_return(status: 404, body: 'aaa')
+          stub_request(:get, 'http://example.com/a').to_return(status: 404, body: 'aaa')
           downloader.load('http://example.com/a') do |io|
             result = io.read
           end
-          expect(result).to eq("")
+          expect(result).to eq('')
+        end
+        it 'supports raising error on HTTP response 404 ' do
+          downloader = Downloader.new
+          stub_request(:get, 'http://example.com/a').to_return(status: 404, body: 'aaa')
+          expect {
+            downloader.load('http://example.com/a', raise_on_http_error: true)
+          }.to raise_error(Downloader::HTTPError)
         end
         it 'derives protocol from base_url' do

data/spec/pageflow/chart/refresh_tag_following_downloader_spec.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module Pageflow
           result = ''
           allow(downloader).to receive(:load)
-            .with(original_url)
+            .with(original_url, {})
             .and_yield(StringIO.new(chart_html))
           refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -27,6 +27,19 @@ module Pageflow
           expect(result).to eq(chart_html)
         end
+        it 'passes raise_on_http_error to downloader' do
+          downloader = double(Downloader).as_null_object
+          refresh_tag_following_downloader = RefreshTagFollowingDownloader.new(downloader)
+          original_url = 'http://datawrapper.dwcdn.net/HPKfl/2/'
+          expect(downloader).to receive(:load)
+            .with(original_url, raise_on_http_error: true)
+          refresh_tag_following_downloader.load_following_refresh_tags(original_url,
+                                                                       raise_on_http_error: true)
+        end
         it 'looks for refresh meta tags and loads their url instead' do
           downloader = double(Downloader)
           refresh_tag_following_downloader = RefreshTagFollowingDownloader.new(downloader)
@@ -44,11 +57,11 @@ module Pageflow
           result = ''
           allow(downloader).to receive(:load)
-            .with(original_url)
+            .with(original_url, {})
             .and_yield(StringIO.new(redirect_html))
           allow(downloader).to receive(:load)
-            .with(target_url)
+            .with(target_url, {})
             .and_yield(StringIO.new(chart_html))
           refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -75,11 +88,11 @@ module Pageflow
           result = ''
           allow(downloader).to receive(:load)
-            .with(original_url)
+            .with(original_url, {})
             .and_yield(StringIO.new(redirect_html))
           allow(downloader).to receive(:load)
-            .with(target_url)
+            .with(target_url, {})
             .and_yield(StringIO.new(chart_html))
           refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -106,11 +119,11 @@ module Pageflow
           result = ''
           allow(downloader).to receive(:load)
-            .with(original_url)
+            .with(original_url, {})
             .and_yield(StringIO.new(redirect_html))
           allow(downloader).to receive(:load)
-            .with(target_url)
+            .with(target_url, {})
             .and_yield(StringIO.new(chart_html))
           refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -130,7 +143,7 @@ module Pageflow
             <html><head><meta http-equiv="REFRESH" content="0; url=#{original_url}"></head></html>
           HTML
-          allow(downloader).to receive(:load).with(original_url) do |&block|
+          allow(downloader).to receive(:load).with(original_url, {}) do |&block|
             block.call(StringIO.new(redirect_html))
           end
@@ -149,7 +162,7 @@ module Pageflow
             <html><head><meta http-equiv="REFRESH" content="something strange"></head></html>
           HTML
-          allow(downloader).to receive(:load).with(original_url).and_yield(StringIO.new(redirect_html))
+          allow(downloader).to receive(:load).with(original_url, {}).and_yield(StringIO.new(redirect_html))
           expect {
             refresh_tag_following_downloader.load_following_refresh_tags(original_url)
@@ -166,7 +179,7 @@ module Pageflow
             <html><head><meta http-equiv="REFRESH"></head></html>
           HTML
-          allow(downloader).to receive(:load).with(original_url).and_yield(StringIO.new(redirect_html))
+          allow(downloader).to receive(:load).with(original_url, {}).and_yield(StringIO.new(redirect_html))
           expect {
             refresh_tag_following_downloader.load_following_refresh_tags(original_url)

data/spec/pageflow/chart/scraper_spec.rb CHANGED Viewed

@@ -19,27 +19,67 @@ module Pageflow
           expect(scraper.html).to include('contents')
         end
-        it 'combines script tags in head' do
+        it 'filters blacklisted selectors' do
           html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
-                <script type="text/javascript" src="/some.js"></script>
-                <script type="text/javascript" src="/other.js"></script>
+                <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
               </head>
               <body>
+                <div id="bad" class="noscript"></div>
+                <div id="good"></div>
               </body>
             </html>
           HTML
-          scraper = Scraper.new(html)
+          scraper = Scraper.new(html, selector_blacklist: ['body .noscript'])
-          expect(HtmlFragment.new(scraper.html)).not_to have_tag('head script[src="/some.js"]')
-          expect(HtmlFragment.new(scraper.html)).to have_tag('head script[src="all.js"]')
+          expect(HtmlFragment.new(scraper.html)).to have_tag('body #good')
+          expect(HtmlFragment.new(scraper.html)).not_to have_tag('body #bad')
         end
-        it 'inserts script tag at position of first script src tag to keep position' \
+        describe 'stylesheets in head' do
+          it 'combines link tags in head' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+                <link rel="stylesheet" type="text/css" href="/some.css">
+                <link rel="stylesheet" type="text/css" href="/other.css">
+              </head>
+              <body>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html)
+            expect(HtmlFragment.new(scraper.html)).not_to have_tag('head link[href="/some.css"]')
+            expect(HtmlFragment.new(scraper.html)).to have_tag('head link[href="all.css"]')
+          end
+        end
+        describe 'scripts in head' do
+          it 'combines script tags in head' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="/other.js"></script>
+              </head>
+              <body>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html)
+            expect(HtmlFragment.new(scraper.html)).not_to have_tag('head script[src="/some.js"]')
+            expect(HtmlFragment.new(scraper.html)).to have_tag('head script[src="all.js"]')
+          end
+          it 'inserts script tag at position of first script src tag to keep position ' \
            'between inline scripts' do
-          html = <<-HTML
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
@@ -55,122 +95,220 @@ module Pageflow
               <body>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html)
+            HTML
+            scraper = Scraper.new(html)
-          fragment = HtmlFragment.new(scraper.html)
+            fragment = HtmlFragment.new(scraper.html)
-          expect(fragment).to have_tags_in_order('head script#setup',
-                                                 'head script[src="all.js"]',
-                                                 'head script#usage')
+            expect(fragment).to have_tags_in_order('head script#setup',
+                                                   'head script[src="all.js"]',
+                                                   'head script#usage')
+          end
         end
-        it 'combines link tags in head' do
-          html = <<-HTML
+        describe 'scripts in body' do
+          it 'combines script tags in body' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
-                <link rel="stylesheet" type="text/css" href="/some.css">
-                <link rel="stylesheet" type="text/css" href="/other.css">
               </head>
               <body>
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="/other.js"></script>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html)
+            HTML
+            scraper = Scraper.new(html)
-          expect(HtmlFragment.new(scraper.html)).not_to have_tag('head link[href="/some.css"]')
-          expect(HtmlFragment.new(scraper.html)).to have_tag('head link[href="all.css"]')
-        end
+            expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script[src="/some.js"]')
+            expect(HtmlFragment.new(scraper.html)).to have_tag('body script[src="all_body.js"]')
+          end
-        it 'filters blacklisted inline scripts' do
-          html = <<-HTML
+          it 'inserts script tag at position of first script src tag to keep position ' \
+           'between inline scripts' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
-                <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
               </head>
               <body>
-                <script id="good">window.ok = true;</script>
-                <script id="bad">alert();</script>
+                <script id="setup">
+                  // Some setup required for scripts below to execute
+                </script>
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="/other.js"></script>
+                <script id="usage">
+                  // Some script using stuff loading above
+                </script>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html, inline_script_blacklist: [/alert/])
+            HTML
+            scraper = Scraper.new(html)
+            fragment = HtmlFragment.new(scraper.html)
+            expect(fragment).to have_tags_in_order('body script#setup',
+                                                   'body script[src="all_body.js"]',
+                                                   'body script#usage')
+          end
-          expect(HtmlFragment.new(scraper.html)).to have_tag('body script#good')
-          expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script#bad')
+          it 'filters blacklisted inline scripts' do
+            html = <<-HTML
+              <!DOCTYPE html>
+              <html>
+                <head>
+                  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+                </head>
+                <body>
+                  <script id="good">window.ok = true;</script>
+                  <script id="bad">alert();</script>
+                </body>
+              </html>
+            HTML
+            scraper = Scraper.new(html, inline_script_blacklist: [/alert/])
+            expect(HtmlFragment.new(scraper.html)).to have_tag('body script#good')
+            expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script#bad')
+          end
         end
+      end
-        it 'filters blacklisted selectors' do
-          html = <<-HTML
+      describe '#javascript_urls' do
+        describe 'scripts in head' do
+          it 'returns list of urls to javascript files' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
-                <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="/other.js"></script>
               </head>
               <body>
-                <div id="bad" class="noscript"></div>
-                <div id="good"></div>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html, selector_blacklist: ['body .noscript'])
+            HTML
+            scraper = Scraper.new(html)
-          expect(HtmlFragment.new(scraper.html)).to have_tag('body #good')
-          expect(HtmlFragment.new(scraper.html)).not_to have_tag('body #bad')
-        end
-      end
+            expect(scraper.javascript_urls_in_head).to eq(['/some.js', '/other.js'])
+          end
-      describe '#javascript_urls' do
-        it 'returns list of urls to javascript files' do
-          html = <<-HTML
+          it 'filters by blacklist' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
                 <script type="text/javascript" src="/some.js"></script>
-                <script type="text/javascript" src="/other.js"></script>
+                <script type="text/javascript" src="http://example.com/piwik.js"></script>
               </head>
               <body>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html)
+            HTML
+            scraper = Scraper.new(html, head_script_blacklist: [/piwik/])
+            expect(scraper.javascript_urls_in_head).to eq(['/some.js'])
+          end
-          expect(scraper.javascript_urls).to eq(['/some.js', '/other.js'])
+          it 'ignores inline scripts in head' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+                <script type="text/javascript"></script>
+              </head>
+              <body>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html)
+            expect(scraper.javascript_urls_in_head).to eq([])
+          end
+          it 'ignores scripts in body' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+              </head>
+              <body>
+                <script type="text/javascript" src="/some.js"></script>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html)
+            expect(scraper.javascript_urls_in_head).to eq([])
+          end
         end
-        it 'filters by blacklist' do
-          html = <<-HTML
+        describe 'scripts in body' do
+          it 'ignores scripts in head' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
                 <script type="text/javascript" src="/some.js"></script>
-                <script type="text/javascript" src="http://example.com/piwik.js"></script>
               </head>
               <body>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html, head_script_blacklist: [/piwik/])
+            HTML
+            scraper = Scraper.new(html)
-          expect(scraper.javascript_urls).to eq(['/some.js'])
-        end
+            expect(scraper.javascript_urls_in_body).to eq([])
+          end
-        it 'ignores inline scripts in head' do
-          html = <<-HTML
+          it 'returns list of urls to javascript files' do
+            html = <<-HTML
             <!DOCTYPE html>
             <html>
               <head>
-                <script type="text/javascript"></script>
               </head>
               <body>
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="/other.js"></script>
               </body>
             </html>
-          HTML
-          scraper = Scraper.new(html)
+            HTML
+            scraper = Scraper.new(html)
+            expect(scraper.javascript_urls_in_body).to eq(['/some.js', '/other.js'])
+          end
+          it 'filters by blacklist' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+              </head>
+              <body>
+                <script type="text/javascript" src="/some.js"></script>
+                <script type="text/javascript" src="http://example.com/piwik.js"></script>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html, body_script_blacklist: [/piwik/])
+            expect(scraper.javascript_urls_in_body).to eq(['/some.js'])
+          end
+          it 'ignores inline scripts in body' do
+            html = <<-HTML
+            <!DOCTYPE html>
+            <html>
+              <head>
+              </head>
+              <body>
+                <script type="text/javascript"></script>
+              </body>
+            </html>
+            HTML
+            scraper = Scraper.new(html)
-          expect(scraper.javascript_urls).to eq([])
+            expect(scraper.javascript_urls_in_body).to eq([])
+          end
         end
       end