pageflow-chart 2.1.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -0
  3. data/CHANGELOG.md +12 -8
  4. data/README.md +2 -2
  5. data/app/assets/javascripts/pageflow/chart/consent.js +16 -0
  6. data/app/assets/javascripts/pageflow/chart/editor/config.js +7 -0
  7. data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +11 -50
  8. data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +6 -4
  9. data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +32 -16
  10. data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +18 -39
  11. data/app/assets/javascripts/pageflow/chart/editor.js +4 -3
  12. data/app/assets/javascripts/pageflow/chart/page_type.js +61 -53
  13. data/app/assets/javascripts/pageflow/chart.js +2 -3
  14. data/app/assets/stylesheets/pageflow/chart/editor.scss +3 -20
  15. data/app/assets/stylesheets/pageflow/chart/themes/default.scss +3 -0
  16. data/app/assets/stylesheets/pageflow/chart.scss +9 -16
  17. data/app/helpers/pageflow/chart/scraped_sites_helper.rb +17 -8
  18. data/app/jobs/pageflow/chart/scrape_site_job.rb +14 -4
  19. data/app/models/pageflow/chart/scraped_site.rb +37 -4
  20. data/app/views/pageflow/chart/editor/scraped_sites/_scraped_site.json.jbuilder +1 -0
  21. data/app/views/pageflow/chart/page.html.erb +9 -2
  22. data/chart.gemspec +2 -2
  23. data/config/locales/de.yml +4 -0
  24. data/config/locales/en.yml +4 -0
  25. data/db/migrate/20190531141820_add_file_attributes_to_scraped_sites.rb +8 -0
  26. data/db/migrate/20190531145431_insert_file_usages_for_scraped_sites.rb +59 -0
  27. data/db/migrate/20200507141608_add_javascript_body_attachment_to_scraped_site.rb +5 -0
  28. data/lib/pageflow/chart/configuration.rb +6 -3
  29. data/lib/pageflow/chart/downloader.rb +4 -1
  30. data/lib/pageflow/chart/page_type.rb +17 -0
  31. data/lib/pageflow/chart/plugin.rb +10 -0
  32. data/lib/pageflow/chart/refresh_tag_following_downloader.rb +3 -3
  33. data/lib/pageflow/chart/scraper.rb +25 -13
  34. data/lib/pageflow/chart/version.rb +1 -1
  35. data/lib/pageflow/chart.rb +4 -0
  36. data/spec/factories/scraped_sites.rb +17 -3
  37. data/spec/fixtures/all.css +3 -0
  38. data/spec/fixtures/all.js +1 -0
  39. data/spec/fixtures/all_body.js +1 -0
  40. data/spec/fixtures/data.csv +1 -0
  41. data/spec/fixtures/index.html +7 -0
  42. data/spec/helpers/pageflow/chart/scraped_sites_helper_spec.rb +59 -0
  43. data/spec/integration/file_type_spec.rb +10 -0
  44. data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +14 -1
  45. data/spec/models/pageflow/chart/scraped_site_spec.rb +54 -0
  46. data/spec/pageflow/chart/downloader_spec.rb +13 -3
  47. data/spec/pageflow/chart/refresh_tag_following_downloader_spec.rb +23 -10
  48. data/spec/pageflow/chart/scraper_spec.rb +201 -63
  49. metadata +28 -20
  50. data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +0 -23
  51. data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +0 -1
  52. data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +0 -2
  53. data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +0 -7
  54. data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +0 -18
  55. data/app/controllers/pageflow/chart/application_controller.rb +0 -6
  56. data/app/controllers/pageflow/chart/scraped_sites_controller.rb +0 -25
  57. data/config/routes.rb +0 -3
  58. data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +0 -35
  59. data/spec/requests/scraping_site_spec.rb +0 -23
@@ -26,5 +26,59 @@ module Pageflow::Chart
26
26
  expect(scraped_site_with_custom_theme.use_custom_theme).to eq(true)
27
27
  expect(scraped_site_without_custom_theme.use_custom_theme).to eq(false)
28
28
  end
29
+
30
+ it 'exposes all attachments for export' do
31
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html')
32
+
33
+ expect(scraped_site.attachments_for_export.map(&:name))
34
+ .to eq(%i[javascript_file javascript_body_file stylesheet_file html_file csv_file])
35
+ end
36
+
37
+ describe '#publish!' do
38
+ it 'transitions state to processing for new site' do
39
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html')
40
+
41
+ scraped_site.publish!
42
+
43
+ expect(scraped_site.state).to eq('processing')
44
+ end
45
+
46
+ it 'transitions state to processed if html file is already set ' \
47
+ '(e.g. for sites that have been created via entry import)' do
48
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
49
+ html_file_file_name: 'index.html')
50
+
51
+ scraped_site.publish!
52
+
53
+ expect(scraped_site.state).to eq('processed')
54
+ end
55
+ end
56
+
57
+ describe '#retryable?' do
58
+ it 'is true if processing_failed' do
59
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
60
+ state: 'processing_failed')
61
+
62
+ expect(scraped_site).to be_retryable
63
+ end
64
+
65
+ it 'is false if processed' do
66
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
67
+ state: 'processed')
68
+
69
+ expect(scraped_site).not_to be_retryable
70
+ end
71
+ end
72
+
73
+ describe '#retry!' do
74
+ it 'transitions state to processing if processing_failed' do
75
+ scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html',
76
+ state: 'processing_failed')
77
+
78
+ scraped_site.retry!
79
+
80
+ expect(scraped_site.state).to eq('processing')
81
+ end
82
+ end
29
83
  end
30
84
  end
@@ -17,17 +17,27 @@ module Pageflow
17
17
  expect(result).to eq("aaa")
18
18
  end
19
19
 
20
- it 'ignores HTTP response 404' do
20
+ it 'ignores HTTP response 404 by default' do
21
21
  downloader = Downloader.new
22
22
  result = ''
23
23
 
24
- stub_request(:get, "http://example.com/a").to_return(status: 404, body: 'aaa')
24
+ stub_request(:get, 'http://example.com/a').to_return(status: 404, body: 'aaa')
25
25
 
26
26
  downloader.load('http://example.com/a') do |io|
27
27
  result = io.read
28
28
  end
29
29
 
30
- expect(result).to eq("")
30
+ expect(result).to eq('')
31
+ end
32
+
33
+ it 'supports raising error on HTTP response 404 ' do
34
+ downloader = Downloader.new
35
+
36
+ stub_request(:get, 'http://example.com/a').to_return(status: 404, body: 'aaa')
37
+
38
+ expect {
39
+ downloader.load('http://example.com/a', raise_on_http_error: true)
40
+ }.to raise_error(Downloader::HTTPError)
31
41
  end
32
42
 
33
43
  it 'derives protocol from base_url' do
@@ -17,7 +17,7 @@ module Pageflow
17
17
  result = ''
18
18
 
19
19
  allow(downloader).to receive(:load)
20
- .with(original_url)
20
+ .with(original_url, {})
21
21
  .and_yield(StringIO.new(chart_html))
22
22
 
23
23
  refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -27,6 +27,19 @@ module Pageflow
27
27
  expect(result).to eq(chart_html)
28
28
  end
29
29
 
30
+ it 'passes raise_on_http_error to downloader' do
31
+ downloader = double(Downloader).as_null_object
32
+ refresh_tag_following_downloader = RefreshTagFollowingDownloader.new(downloader)
33
+
34
+ original_url = 'http://datawrapper.dwcdn.net/HPKfl/2/'
35
+
36
+ expect(downloader).to receive(:load)
37
+ .with(original_url, raise_on_http_error: true)
38
+
39
+ refresh_tag_following_downloader.load_following_refresh_tags(original_url,
40
+ raise_on_http_error: true)
41
+ end
42
+
30
43
  it 'looks for refresh meta tags and loads their url instead' do
31
44
  downloader = double(Downloader)
32
45
  refresh_tag_following_downloader = RefreshTagFollowingDownloader.new(downloader)
@@ -44,11 +57,11 @@ module Pageflow
44
57
  result = ''
45
58
 
46
59
  allow(downloader).to receive(:load)
47
- .with(original_url)
60
+ .with(original_url, {})
48
61
  .and_yield(StringIO.new(redirect_html))
49
62
 
50
63
  allow(downloader).to receive(:load)
51
- .with(target_url)
64
+ .with(target_url, {})
52
65
  .and_yield(StringIO.new(chart_html))
53
66
 
54
67
  refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -75,11 +88,11 @@ module Pageflow
75
88
  result = ''
76
89
 
77
90
  allow(downloader).to receive(:load)
78
- .with(original_url)
91
+ .with(original_url, {})
79
92
  .and_yield(StringIO.new(redirect_html))
80
93
 
81
94
  allow(downloader).to receive(:load)
82
- .with(target_url)
95
+ .with(target_url, {})
83
96
  .and_yield(StringIO.new(chart_html))
84
97
 
85
98
  refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -106,11 +119,11 @@ module Pageflow
106
119
  result = ''
107
120
 
108
121
  allow(downloader).to receive(:load)
109
- .with(original_url)
122
+ .with(original_url, {})
110
123
  .and_yield(StringIO.new(redirect_html))
111
124
 
112
125
  allow(downloader).to receive(:load)
113
- .with(target_url)
126
+ .with(target_url, {})
114
127
  .and_yield(StringIO.new(chart_html))
115
128
 
116
129
  refresh_tag_following_downloader.load_following_refresh_tags(original_url) do |file|
@@ -130,7 +143,7 @@ module Pageflow
130
143
  <html><head><meta http-equiv="REFRESH" content="0; url=#{original_url}"></head></html>
131
144
  HTML
132
145
 
133
- allow(downloader).to receive(:load).with(original_url) do |&block|
146
+ allow(downloader).to receive(:load).with(original_url, {}) do |&block|
134
147
  block.call(StringIO.new(redirect_html))
135
148
  end
136
149
 
@@ -149,7 +162,7 @@ module Pageflow
149
162
  <html><head><meta http-equiv="REFRESH" content="something strange"></head></html>
150
163
  HTML
151
164
 
152
- allow(downloader).to receive(:load).with(original_url).and_yield(StringIO.new(redirect_html))
165
+ allow(downloader).to receive(:load).with(original_url, {}).and_yield(StringIO.new(redirect_html))
153
166
 
154
167
  expect {
155
168
  refresh_tag_following_downloader.load_following_refresh_tags(original_url)
@@ -166,7 +179,7 @@ module Pageflow
166
179
  <html><head><meta http-equiv="REFRESH"></head></html>
167
180
  HTML
168
181
 
169
- allow(downloader).to receive(:load).with(original_url).and_yield(StringIO.new(redirect_html))
182
+ allow(downloader).to receive(:load).with(original_url, {}).and_yield(StringIO.new(redirect_html))
170
183
 
171
184
  expect {
172
185
  refresh_tag_following_downloader.load_following_refresh_tags(original_url)
@@ -19,27 +19,67 @@ module Pageflow
19
19
  expect(scraper.html).to include('contents')
20
20
  end
21
21
 
22
- it 'combines script tags in head' do
22
+ it 'filters blacklisted selectors' do
23
23
  html = <<-HTML
24
24
  <!DOCTYPE html>
25
25
  <html>
26
26
  <head>
27
- <script type="text/javascript" src="/some.js"></script>
28
- <script type="text/javascript" src="/other.js"></script>
27
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
29
28
  </head>
30
29
  <body>
30
+ <div id="bad" class="noscript"></div>
31
+ <div id="good"></div>
31
32
  </body>
32
33
  </html>
33
34
  HTML
34
- scraper = Scraper.new(html)
35
+ scraper = Scraper.new(html, selector_blacklist: ['body .noscript'])
35
36
 
36
- expect(HtmlFragment.new(scraper.html)).not_to have_tag('head script[src="/some.js"]')
37
- expect(HtmlFragment.new(scraper.html)).to have_tag('head script[src="all.js"]')
37
+ expect(HtmlFragment.new(scraper.html)).to have_tag('body #good')
38
+ expect(HtmlFragment.new(scraper.html)).not_to have_tag('body #bad')
38
39
  end
39
40
 
40
- it 'inserts script tag at position of first script src tag to keep position' \
41
+ describe 'stylesheets in head' do
42
+ it 'combines link tags in head' do
43
+ html = <<-HTML
44
+ <!DOCTYPE html>
45
+ <html>
46
+ <head>
47
+ <link rel="stylesheet" type="text/css" href="/some.css">
48
+ <link rel="stylesheet" type="text/css" href="/other.css">
49
+ </head>
50
+ <body>
51
+ </body>
52
+ </html>
53
+ HTML
54
+ scraper = Scraper.new(html)
55
+
56
+ expect(HtmlFragment.new(scraper.html)).not_to have_tag('head link[href="/some.css"]')
57
+ expect(HtmlFragment.new(scraper.html)).to have_tag('head link[href="all.css"]')
58
+ end
59
+ end
60
+
61
+ describe 'scripts in head' do
62
+ it 'combines script tags in head' do
63
+ html = <<-HTML
64
+ <!DOCTYPE html>
65
+ <html>
66
+ <head>
67
+ <script type="text/javascript" src="/some.js"></script>
68
+ <script type="text/javascript" src="/other.js"></script>
69
+ </head>
70
+ <body>
71
+ </body>
72
+ </html>
73
+ HTML
74
+ scraper = Scraper.new(html)
75
+
76
+ expect(HtmlFragment.new(scraper.html)).not_to have_tag('head script[src="/some.js"]')
77
+ expect(HtmlFragment.new(scraper.html)).to have_tag('head script[src="all.js"]')
78
+ end
79
+
80
+ it 'inserts script tag at position of first script src tag to keep position ' \
41
81
  'between inline scripts' do
42
- html = <<-HTML
82
+ html = <<-HTML
43
83
  <!DOCTYPE html>
44
84
  <html>
45
85
  <head>
@@ -55,122 +95,220 @@ module Pageflow
55
95
  <body>
56
96
  </body>
57
97
  </html>
58
- HTML
59
- scraper = Scraper.new(html)
98
+ HTML
99
+ scraper = Scraper.new(html)
60
100
 
61
- fragment = HtmlFragment.new(scraper.html)
101
+ fragment = HtmlFragment.new(scraper.html)
62
102
 
63
- expect(fragment).to have_tags_in_order('head script#setup',
64
- 'head script[src="all.js"]',
65
- 'head script#usage')
103
+ expect(fragment).to have_tags_in_order('head script#setup',
104
+ 'head script[src="all.js"]',
105
+ 'head script#usage')
106
+ end
66
107
  end
67
108
 
68
- it 'combines link tags in head' do
69
- html = <<-HTML
109
+ describe 'scripts in body' do
110
+ it 'combines script tags in body' do
111
+ html = <<-HTML
70
112
  <!DOCTYPE html>
71
113
  <html>
72
114
  <head>
73
- <link rel="stylesheet" type="text/css" href="/some.css">
74
- <link rel="stylesheet" type="text/css" href="/other.css">
75
115
  </head>
76
116
  <body>
117
+ <script type="text/javascript" src="/some.js"></script>
118
+ <script type="text/javascript" src="/other.js"></script>
77
119
  </body>
78
120
  </html>
79
- HTML
80
- scraper = Scraper.new(html)
121
+ HTML
122
+ scraper = Scraper.new(html)
81
123
 
82
- expect(HtmlFragment.new(scraper.html)).not_to have_tag('head link[href="/some.css"]')
83
- expect(HtmlFragment.new(scraper.html)).to have_tag('head link[href="all.css"]')
84
- end
124
+ expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script[src="/some.js"]')
125
+ expect(HtmlFragment.new(scraper.html)).to have_tag('body script[src="all_body.js"]')
126
+ end
85
127
 
86
- it 'filters blacklisted inline scripts' do
87
- html = <<-HTML
128
+ it 'inserts script tag at position of first script src tag to keep position ' \
129
+ 'between inline scripts' do
130
+ html = <<-HTML
88
131
  <!DOCTYPE html>
89
132
  <html>
90
133
  <head>
91
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
92
134
  </head>
93
135
  <body>
94
- <script id="good">window.ok = true;</script>
95
- <script id="bad">alert();</script>
136
+ <script id="setup">
137
+ // Some setup required for scripts below to execute
138
+ </script>
139
+ <script type="text/javascript" src="/some.js"></script>
140
+ <script type="text/javascript" src="/other.js"></script>
141
+ <script id="usage">
142
+ // Some script using stuff loading above
143
+ </script>
96
144
  </body>
97
145
  </html>
98
- HTML
99
- scraper = Scraper.new(html, inline_script_blacklist: [/alert/])
146
+ HTML
147
+ scraper = Scraper.new(html)
148
+
149
+ fragment = HtmlFragment.new(scraper.html)
150
+
151
+ expect(fragment).to have_tags_in_order('body script#setup',
152
+ 'body script[src="all_body.js"]',
153
+ 'body script#usage')
154
+ end
100
155
 
101
- expect(HtmlFragment.new(scraper.html)).to have_tag('body script#good')
102
- expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script#bad')
156
+ it 'filters blacklisted inline scripts' do
157
+ html = <<-HTML
158
+ <!DOCTYPE html>
159
+ <html>
160
+ <head>
161
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
162
+ </head>
163
+ <body>
164
+ <script id="good">window.ok = true;</script>
165
+ <script id="bad">alert();</script>
166
+ </body>
167
+ </html>
168
+ HTML
169
+ scraper = Scraper.new(html, inline_script_blacklist: [/alert/])
170
+
171
+ expect(HtmlFragment.new(scraper.html)).to have_tag('body script#good')
172
+ expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script#bad')
173
+ end
103
174
  end
175
+ end
104
176
 
105
- it 'filters blacklisted selectors' do
106
- html = <<-HTML
177
+ describe '#javascript_urls' do
178
+ describe 'scripts in head' do
179
+ it 'returns list of urls to javascript files' do
180
+ html = <<-HTML
107
181
  <!DOCTYPE html>
108
182
  <html>
109
183
  <head>
110
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
184
+ <script type="text/javascript" src="/some.js"></script>
185
+ <script type="text/javascript" src="/other.js"></script>
111
186
  </head>
112
187
  <body>
113
- <div id="bad" class="noscript"></div>
114
- <div id="good"></div>
115
188
  </body>
116
189
  </html>
117
- HTML
118
- scraper = Scraper.new(html, selector_blacklist: ['body .noscript'])
190
+ HTML
191
+ scraper = Scraper.new(html)
119
192
 
120
- expect(HtmlFragment.new(scraper.html)).to have_tag('body #good')
121
- expect(HtmlFragment.new(scraper.html)).not_to have_tag('body #bad')
122
- end
123
- end
193
+ expect(scraper.javascript_urls_in_head).to eq(['/some.js', '/other.js'])
194
+ end
124
195
 
125
- describe '#javascript_urls' do
126
- it 'returns list of urls to javascript files' do
127
- html = <<-HTML
196
+ it 'filters by blacklist' do
197
+ html = <<-HTML
128
198
  <!DOCTYPE html>
129
199
  <html>
130
200
  <head>
131
201
  <script type="text/javascript" src="/some.js"></script>
132
- <script type="text/javascript" src="/other.js"></script>
202
+ <script type="text/javascript" src="http://example.com/piwik.js"></script>
133
203
  </head>
134
204
  <body>
135
205
  </body>
136
206
  </html>
137
- HTML
138
- scraper = Scraper.new(html)
207
+ HTML
208
+ scraper = Scraper.new(html, head_script_blacklist: [/piwik/])
209
+
210
+ expect(scraper.javascript_urls_in_head).to eq(['/some.js'])
211
+ end
139
212
 
140
- expect(scraper.javascript_urls).to eq(['/some.js', '/other.js'])
213
+ it 'ignores inline scripts in head' do
214
+ html = <<-HTML
215
+ <!DOCTYPE html>
216
+ <html>
217
+ <head>
218
+ <script type="text/javascript"></script>
219
+ </head>
220
+ <body>
221
+ </body>
222
+ </html>
223
+ HTML
224
+ scraper = Scraper.new(html)
225
+
226
+ expect(scraper.javascript_urls_in_head).to eq([])
227
+ end
228
+
229
+ it 'ignores scripts in body' do
230
+ html = <<-HTML
231
+ <!DOCTYPE html>
232
+ <html>
233
+ <head>
234
+ </head>
235
+ <body>
236
+ <script type="text/javascript" src="/some.js"></script>
237
+ </body>
238
+ </html>
239
+ HTML
240
+ scraper = Scraper.new(html)
241
+
242
+ expect(scraper.javascript_urls_in_head).to eq([])
243
+ end
141
244
  end
142
245
 
143
- it 'filters by blacklist' do
144
- html = <<-HTML
246
+ describe 'scripts in body' do
247
+ it 'ignores scripts in head' do
248
+ html = <<-HTML
145
249
  <!DOCTYPE html>
146
250
  <html>
147
251
  <head>
148
252
  <script type="text/javascript" src="/some.js"></script>
149
- <script type="text/javascript" src="http://example.com/piwik.js"></script>
150
253
  </head>
151
254
  <body>
152
255
  </body>
153
256
  </html>
154
- HTML
155
- scraper = Scraper.new(html, head_script_blacklist: [/piwik/])
257
+ HTML
258
+ scraper = Scraper.new(html)
156
259
 
157
- expect(scraper.javascript_urls).to eq(['/some.js'])
158
- end
260
+ expect(scraper.javascript_urls_in_body).to eq([])
261
+ end
159
262
 
160
- it 'ignores inline scripts in head' do
161
- html = <<-HTML
263
+ it 'returns list of urls to javascript files' do
264
+ html = <<-HTML
162
265
  <!DOCTYPE html>
163
266
  <html>
164
267
  <head>
165
- <script type="text/javascript"></script>
166
268
  </head>
167
269
  <body>
270
+ <script type="text/javascript" src="/some.js"></script>
271
+ <script type="text/javascript" src="/other.js"></script>
168
272
  </body>
169
273
  </html>
170
- HTML
171
- scraper = Scraper.new(html)
274
+ HTML
275
+ scraper = Scraper.new(html)
276
+
277
+ expect(scraper.javascript_urls_in_body).to eq(['/some.js', '/other.js'])
278
+ end
279
+
280
+ it 'filters by blacklist' do
281
+ html = <<-HTML
282
+ <!DOCTYPE html>
283
+ <html>
284
+ <head>
285
+ </head>
286
+ <body>
287
+ <script type="text/javascript" src="/some.js"></script>
288
+ <script type="text/javascript" src="http://example.com/piwik.js"></script>
289
+ </body>
290
+ </html>
291
+ HTML
292
+ scraper = Scraper.new(html, body_script_blacklist: [/piwik/])
293
+
294
+ expect(scraper.javascript_urls_in_body).to eq(['/some.js'])
295
+ end
296
+
297
+ it 'ignores inline scripts in body' do
298
+ html = <<-HTML
299
+ <!DOCTYPE html>
300
+ <html>
301
+ <head>
302
+ </head>
303
+ <body>
304
+ <script type="text/javascript"></script>
305
+ </body>
306
+ </html>
307
+ HTML
308
+ scraper = Scraper.new(html)
172
309
 
173
- expect(scraper.javascript_urls).to eq([])
310
+ expect(scraper.javascript_urls_in_body).to eq([])
311
+ end
174
312
  end
175
313
  end
176
314