pageflow-chart 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +21 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +10 -0
- data/README.md +91 -0
- data/Rakefile +20 -0
- data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
- data/app/assets/images/pageflow/chart_sprite.png +0 -0
- data/app/assets/images/pageflow/ov-chart.png +0 -0
- data/app/assets/javascripts/pageflow/chart.js +5 -0
- data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
- data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
- data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
- data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
- data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
- data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
- data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
- data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
- data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
- data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
- data/app/controllers/pageflow/chart/application_controller.rb +6 -0
- data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
- data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
- data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
- data/app/models/pageflow/chart/scraped_site.rb +51 -0
- data/app/views/pageflow/chart/page.html +41 -0
- data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
- data/bin/rails +8 -0
- data/chart.gemspec +30 -0
- data/config/locales/de.yml +40 -0
- data/config/locales/en.yml +22 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
- data/lib/pageflow/chart.rb +21 -0
- data/lib/pageflow/chart/configuration.rb +63 -0
- data/lib/pageflow/chart/downloader.rb +53 -0
- data/lib/pageflow/chart/engine.rb +17 -0
- data/lib/pageflow/chart/page_type.rb +15 -0
- data/lib/pageflow/chart/scraper.rb +107 -0
- data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
- data/spec/dummy/README.rdoc +28 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/app/assets/images/.keep +0 -0
- data/spec/dummy/app/assets/javascripts/application.js +13 -0
- data/spec/dummy/app/assets/stylesheets/application.css +13 -0
- data/spec/dummy/app/controllers/application_controller.rb +5 -0
- data/spec/dummy/app/controllers/concerns/.keep +0 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/mailers/.keep +0 -0
- data/spec/dummy/app/models/.keep +0 -0
- data/spec/dummy/app/models/concerns/.keep +0 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/config/application.rb +22 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +29 -0
- data/spec/dummy/config/environments/production.rb +80 -0
- data/spec/dummy/config/environments/test.rb +36 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/spec/dummy/config/initializers/inflections.rb +16 -0
- data/spec/dummy/config/initializers/mime_types.rb +5 -0
- data/spec/dummy/config/initializers/secret_token.rb +12 -0
- data/spec/dummy/config/initializers/session_store.rb +3 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +23 -0
- data/spec/dummy/config/routes.rb +4 -0
- data/spec/dummy/db/schema.rb +39 -0
- data/spec/dummy/lib/assets/.keep +0 -0
- data/spec/dummy/public/404.html +58 -0
- data/spec/dummy/public/422.html +58 -0
- data/spec/dummy/public/500.html +57 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/factories/scraped_sites.rb +5 -0
- data/spec/fixtures/datawrapper.html +121 -0
- data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
- data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
- data/spec/pageflow/chart/downloader_spec.rb +90 -0
- data/spec/pageflow/chart/scraper_spec.rb +179 -0
- data/spec/requests/scraping_site_spec.rb +23 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/html_fragment.rb +13 -0
- data/spec/support/paperclip.rb +11 -0
- data/spec/support/resque.rb +20 -0
- data/spec/support/webmock.rb +11 -0
- metadata +363 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module Pageflow
|
|
2
|
+
module Chart
|
|
3
|
+
class ScrapedSitesController < Chart::ApplicationController
|
|
4
|
+
respond_to :json
|
|
5
|
+
|
|
6
|
+
def create
|
|
7
|
+
scraped_site = ScrapedSite.create!(scraped_site_params)
|
|
8
|
+
scraped_site.process!
|
|
9
|
+
|
|
10
|
+
respond_with(scraped_site)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def show
|
|
14
|
+
scraped_site = ScrapedSite.find(params[:id])
|
|
15
|
+
respond_with(scraped_site)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
protected
|
|
19
|
+
|
|
20
|
+
def scraped_site_params
|
|
21
|
+
params.require(:scraped_site).permit(:url)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module Pageflow
|
|
2
|
+
module Chart
|
|
3
|
+
class ScrapeSiteJob
|
|
4
|
+
extend StateMachineJob
|
|
5
|
+
@queue = :scraping
|
|
6
|
+
|
|
7
|
+
attr_reader :downloader
|
|
8
|
+
|
|
9
|
+
def initialize(downloader)
|
|
10
|
+
@downloader = downloader
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def perform(scraped_site)
|
|
14
|
+
downloader.load(scraped_site.url) do |file|
|
|
15
|
+
scraper = Scraper.new(file.read, Chart.config.scraper_options)
|
|
16
|
+
scraped_site.html_file = StringIOWithContentType.new(
|
|
17
|
+
scraper.html,
|
|
18
|
+
file_name: 'file.html',
|
|
19
|
+
content_type: 'text/html'
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
downloader.load_all(scraper.javascript_urls, extension: '.js', separator: "\n;") do |file|
|
|
23
|
+
scraped_site.javascript_file = file
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
downloader.load_all(scraper.stylesheet_urls, extension: '.css', separator: "\n;") do |file|
|
|
27
|
+
scraped_site.stylesheet_file = file
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
downloader.load(scraped_site.csv_url) do |file|
|
|
32
|
+
scraped_site.csv_file = file
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
:ok
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.perform_with_result(scraped_site, options = {})
|
|
39
|
+
# This is were the downloader passed to `initialize` is created.
|
|
40
|
+
new(Downloader.new(base_url: scraped_site.url)).perform(scraped_site)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
class StringIOWithContentType < StringIO
|
|
45
|
+
def initialize(string, options)
|
|
46
|
+
super(string)
|
|
47
|
+
@options = options
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def content_type
|
|
51
|
+
@options.fetch(:content_type)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def original_filename
|
|
55
|
+
@options.fetch(:file_name)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
module Pageflow
|
|
2
|
+
module Chart
|
|
3
|
+
class ScrapedSite < ActiveRecord::Base
|
|
4
|
+
has_attached_file :javascript_file, Chart.config.paperclip_options(extension: 'js')
|
|
5
|
+
has_attached_file :stylesheet_file, Chart.config.paperclip_options(extension: 'css')
|
|
6
|
+
has_attached_file :html_file, Chart.config.paperclip_options(extension: 'html')
|
|
7
|
+
has_attached_file :csv_file, Chart.config.paperclip_options(basename: 'data', extension: 'csv')
|
|
8
|
+
|
|
9
|
+
state_machine initial: 'unprocessed' do
|
|
10
|
+
extend StateMachineJob::Macro
|
|
11
|
+
|
|
12
|
+
state 'unprocessed'
|
|
13
|
+
state 'processing'
|
|
14
|
+
state 'processing_failed'
|
|
15
|
+
state 'processed'
|
|
16
|
+
|
|
17
|
+
event :process do
|
|
18
|
+
transition 'unprocessed' => 'processing'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
event :reprocess do
|
|
22
|
+
transition 'processed' => 'processing'
|
|
23
|
+
transition 'processing_failed' => 'processing'
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
job ScrapeSiteJob do
|
|
27
|
+
on_enter 'processing'
|
|
28
|
+
result ok: 'processed'
|
|
29
|
+
result error: 'processing_failed'
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def csv_url
|
|
34
|
+
URI.join(url, 'data.csv').to_s
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def as_json(*)
|
|
38
|
+
super.merge(html_file_url: html_file_url)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def html_file_url
|
|
42
|
+
return unless html_file.try(:path)
|
|
43
|
+
if Chart.config.scraped_sites_root_url.present?
|
|
44
|
+
File.join(Chart.config.scraped_sites_root_url, html_file.path)
|
|
45
|
+
else
|
|
46
|
+
html_file.url
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
<div class="blackLayer"></div>
|
|
2
|
+
<div class="content_and_background chart_page">
|
|
3
|
+
<div class="backgroundArea">
|
|
4
|
+
<%= background_image_div(configuration, 'background_image') %>
|
|
5
|
+
<%= shadow_div :opacity => configuration['gradient_opacity'] %>
|
|
6
|
+
</div>
|
|
7
|
+
|
|
8
|
+
<div class="content">
|
|
9
|
+
<div class="iframeWrapper">
|
|
10
|
+
<iframe data-src="<%= scraped_site_url(configuration['scraped_site_id']) %>"
|
|
11
|
+
style="width: 100%; height: 100%"
|
|
12
|
+
name="°"
|
|
13
|
+
scrolling="auto"
|
|
14
|
+
frameborder="0"
|
|
15
|
+
align="aus"
|
|
16
|
+
marginheight="15"
|
|
17
|
+
marginwidth="15"
|
|
18
|
+
allowfullscreen="true" mozallowfullscreen="true" webkitallowfullscreen="true">
|
|
19
|
+
</iframe>
|
|
20
|
+
<div class="iframe_overlay"></div>
|
|
21
|
+
<div class="bigscreen_toggler" tabindex="4" title="<%= t('.toggle_title') %>"><%= t('.toggle') %></div>
|
|
22
|
+
</div>
|
|
23
|
+
<div class="scroller">
|
|
24
|
+
<div>
|
|
25
|
+
<div class="contentWrapper">
|
|
26
|
+
<div class="page_header">
|
|
27
|
+
<h2>
|
|
28
|
+
<span class="tagline"><%= configuration['tagline'] %></span>
|
|
29
|
+
<span class="title"><%= configuration['title'] %></span>
|
|
30
|
+
<span class="subtitle"><%= configuration['subtitle'] %></span>
|
|
31
|
+
</h2>
|
|
32
|
+
<%= background_image_tag(configuration['background_image_id'], {"class" => "print_image"}) %>
|
|
33
|
+
</div>
|
|
34
|
+
<div class="contentText">
|
|
35
|
+
<p><%= raw configuration['text'] %></p>
|
|
36
|
+
</div>
|
|
37
|
+
</div>
|
|
38
|
+
</div>
|
|
39
|
+
</div>
|
|
40
|
+
</div>
|
|
41
|
+
</div>
|
data/bin/rails
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# This command will automatically be run when you run "rails" with Rails 4 gems installed from the root of your application.
|
|
3
|
+
|
|
4
|
+
ENGINE_ROOT = File.expand_path('../..', __FILE__)
|
|
5
|
+
ENGINE_PATH = File.expand_path('../../lib/pageflow/chart/engine', __FILE__)
|
|
6
|
+
|
|
7
|
+
require 'rails/all'
|
|
8
|
+
require 'rails/engine/commands'
|
data/chart.gemspec
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
Gem::Specification.new do |spec|
|
|
4
|
+
spec.name = "pageflow-chart"
|
|
5
|
+
spec.version = "0.1.0"
|
|
6
|
+
spec.authors = ["Tim Fischbach"]
|
|
7
|
+
spec.email = ["tfischbach@codevise.de"]
|
|
8
|
+
spec.summary = "Pagetype for Embedded Datawrapper Charts"
|
|
9
|
+
spec.homepage = ""
|
|
10
|
+
spec.license = "MIT"
|
|
11
|
+
|
|
12
|
+
spec.files = `git ls-files`.split($/)
|
|
13
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
14
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
15
|
+
spec.require_paths = ["lib"]
|
|
16
|
+
|
|
17
|
+
spec.add_runtime_dependency "pageflow", "~> 0.7"
|
|
18
|
+
spec.add_runtime_dependency "nokogiri"
|
|
19
|
+
spec.add_runtime_dependency "paperclip"
|
|
20
|
+
spec.add_runtime_dependency "state_machine"
|
|
21
|
+
spec.add_runtime_dependency "state_machine_job"
|
|
22
|
+
spec.add_runtime_dependency 'i18n-js'
|
|
23
|
+
|
|
24
|
+
spec.add_development_dependency "bundler"
|
|
25
|
+
spec.add_development_dependency "rake"
|
|
26
|
+
spec.add_development_dependency "rspec-rails", "~> 2.0"
|
|
27
|
+
spec.add_development_dependency 'factory_girl_rails'
|
|
28
|
+
spec.add_development_dependency "sqlite3"
|
|
29
|
+
spec.add_development_dependency "webmock"
|
|
30
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
de:
|
|
2
|
+
activerecord:
|
|
3
|
+
attributes:
|
|
4
|
+
pageflow/page:
|
|
5
|
+
scraped_site_id: Diagramm URL
|
|
6
|
+
pageflow:
|
|
7
|
+
chart:
|
|
8
|
+
editor:
|
|
9
|
+
templates:
|
|
10
|
+
scraped_site_status:
|
|
11
|
+
failed: Beim Herunterladen des Diagramms ist ein Fehler aufgetreten.
|
|
12
|
+
pending: Diagramm wird heruntergeladen...
|
|
13
|
+
help_entries:
|
|
14
|
+
page_type:
|
|
15
|
+
menu_item: Diagramm
|
|
16
|
+
text: ! '# Diagramm
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Einbindung von DataWrapper-Diagrammen
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Hier kannst Du Deinen Pageflow um animierte Infografiken ergänzen. Eingebettet ist das
|
|
23
|
+
|
|
24
|
+
Diagramm in ein Hintergrund-Bild und Text. Die Infografik lässt sich durch Klicken
|
|
25
|
+
|
|
26
|
+
vergrössern.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Dein Diagramm musst Du allerdings zuvor extern erstellen und einen entsprechenden Link generieren. Unter www.datawrapper.de findest Du Beispiele und die Konditionen, um
|
|
30
|
+
|
|
31
|
+
diesen Dienst in Anspruch zu nehmen.
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Typische Anwendungsbeispiele: Statistiken, Diagramme, Zahlen&Fakten'
|
|
35
|
+
page:
|
|
36
|
+
toggle: Ansicht vergrößern bzw. verkleinern
|
|
37
|
+
toggle_title: Ansicht vergrößern bzw. verkleinern
|
|
38
|
+
page_type_category_name: Daten und Diagramme
|
|
39
|
+
page_type_description: Einbindung von Diagrammen, die mit DataWrapper erstellt wurden
|
|
40
|
+
page_type_name: Diagramm
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
en:
|
|
2
|
+
activerecord:
|
|
3
|
+
attributes:
|
|
4
|
+
pageflow/page:
|
|
5
|
+
scraped_site_id: Chart URL
|
|
6
|
+
pageflow:
|
|
7
|
+
chart:
|
|
8
|
+
editor:
|
|
9
|
+
templates:
|
|
10
|
+
scraped_site_status:
|
|
11
|
+
failed: Chart download failed.
|
|
12
|
+
pending: Downloading chart...
|
|
13
|
+
help_entries:
|
|
14
|
+
page_type:
|
|
15
|
+
menu_item: Chart
|
|
16
|
+
text: ! "# Chart\n\nIntegration of a DataWrapper-Diagram\n\nHere you can add animated infographics to your Pageflow. The diagram is embedded into a background-picture and text. To enlarge the graphic you simply have to click on it. \n\nBut first of all you have to create your graphic externally and generate a link. You can find examples and requirements for this under www.datawrapper.de.\n\nExamples of application: statistics, diagrams, numbers & facts"
|
|
17
|
+
page:
|
|
18
|
+
toggle: Toggle
|
|
19
|
+
toggle_title: Toggle
|
|
20
|
+
page_type_category_name: Data and Charts
|
|
21
|
+
page_type_description: Embedded DataWrapper chart
|
|
22
|
+
page_type_name: Chart
|
data/config/routes.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
class CreatePageflowChartScrapedSites < ActiveRecord::Migration
|
|
2
|
+
def change
|
|
3
|
+
create_table :pageflow_chart_scraped_sites do |t|
|
|
4
|
+
t.string :url
|
|
5
|
+
t.string :state
|
|
6
|
+
t.attachment :html_file
|
|
7
|
+
t.attachment :javascript_file
|
|
8
|
+
t.attachment :stylesheet_file
|
|
9
|
+
t.attachment :csv_file
|
|
10
|
+
|
|
11
|
+
t.timestamps
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
require 'paperclip'
|
|
2
|
+
require 'state_machine'
|
|
3
|
+
require 'state_machine_job'
|
|
4
|
+
|
|
5
|
+
require 'pageflow/chart/engine'
|
|
6
|
+
|
|
7
|
+
module Pageflow
|
|
8
|
+
module Chart
|
|
9
|
+
def self.config
|
|
10
|
+
@config ||= Chart::Configuration.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.configure(&block)
|
|
14
|
+
block.call(config)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.page_type
|
|
18
|
+
Chart::PageType.new
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
module Pageflow
|
|
2
|
+
module Chart
|
|
3
|
+
class Configuration
|
|
4
|
+
# Options to pass to Scraper when it is created in ScrapedSiteJob
|
|
5
|
+
attr_reader :scraper_options
|
|
6
|
+
|
|
7
|
+
# paperclip_base_path is a prefix for the paperclip options path
|
|
8
|
+
attr_accessor :paperclip_base_path
|
|
9
|
+
|
|
10
|
+
# If present scraped_sites_root_url replaces the host/domain-name of the
|
|
11
|
+
# URL paperclip returns for the scraped HTML file.
|
|
12
|
+
#
|
|
13
|
+
# This can be used to circumvent the same-domain policy by setting it
|
|
14
|
+
# to ie "/datawrapper" and redirecting from there to the S3 host alias that holds
|
|
15
|
+
# the files.
|
|
16
|
+
attr_accessor :scraped_sites_root_url
|
|
17
|
+
|
|
18
|
+
# Default options for paperclip attachments which are supposed to
|
|
19
|
+
# use s3 storage. All options allowed in paperclip has_attached_file
|
|
20
|
+
# calls are allowed.
|
|
21
|
+
# This defaults to the configuration in `config/initializers/pageflow.rb` by the same name.
|
|
22
|
+
#
|
|
23
|
+
# @param [Hash] opts
|
|
24
|
+
# @option opts [Array<Regexp>] :head_script_blacklist Script tags in page head are ignored if they match any of this list of regexes.
|
|
25
|
+
# @option opts [Array<Regexp>] :inline_script_blacklist Inline script tags are ignored if they match any of this list of regexes.
|
|
26
|
+
# @option opts [Array<String>] :selector_blacklist HTML-elements matched by selectors in this list will not be scraped.
|
|
27
|
+
# @return [Hash]
|
|
28
|
+
attr_accessor :paperclip_s3_default_options
|
|
29
|
+
|
|
30
|
+
# White list of URL prefixes (including protocol) of scraped
|
|
31
|
+
# sites.
|
|
32
|
+
# @return [Array<String>]
|
|
33
|
+
attr_reader :supported_hosts
|
|
34
|
+
|
|
35
|
+
def initialize
|
|
36
|
+
@scraper_options = {
|
|
37
|
+
head_script_blacklist: [/piwik/],
|
|
38
|
+
inline_script_blacklist: [/piwik/],
|
|
39
|
+
selector_blacklist: ['body .noscript']
|
|
40
|
+
}
|
|
41
|
+
@paperclip_s3_default_options = {}
|
|
42
|
+
@paperclip_base_path = ':host'
|
|
43
|
+
@scraped_sites_root_url = nil
|
|
44
|
+
@supported_hosts = ['http://cf.datawrapper.de']
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @api private
|
|
48
|
+
def paperclip_options(options = {})
|
|
49
|
+
Pageflow.config.paperclip_s3_default_options
|
|
50
|
+
.deep_merge(default_paperclip_path_options(options))
|
|
51
|
+
.deep_merge(paperclip_s3_default_options)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def default_paperclip_path_options(options)
|
|
57
|
+
{
|
|
58
|
+
path: File.join(paperclip_base_path, ":class/:id_partition/#{options.fetch(:basename, 'all')}.#{options.fetch(:extension)}")
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|